diff --git a/.github/workflows/cla.yaml b/.github/workflows/cla.yaml
index 8e0275169fb..f211f20e3d2 100644
--- a/.github/workflows/cla.yaml
+++ b/.github/workflows/cla.yaml
@@ -27,4 +27,4 @@ jobs:
           # branch should not be protected
           branch: 'cla'
           # cannot use teams due to: https://github.com/contributor-assistant/github-action/issues/100
-          allowlist: actions-user, altay, dannygoldstein, davidwallacejackson, jamie-rasmussen, jlzhao27, jo-fang, jwlee64, laxels, morganmcg1, nickpenaranda, scottire, shawnlewis, staceysv, tssweeney, vanpelt, vwrj, wandbmachine
+          allowlist: actions-user, altay, bdytx5, dannygoldstein, davidwallacejackson, jamie-rasmussen, jlzhao27, jo-fang, jwlee64, laxels, morganmcg1, nickpenaranda, scottire, shawnlewis, staceysv, tssweeney, vanpelt, vwrj, wandbmachine
diff --git a/docs/docs/guides/cookbooks/summarization/.gitignore b/docs/docs/guides/cookbooks/summarization/.gitignore
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/docs/docs/guides/cookbooks/summarization/.gitignore
@@ -0,0 +1 @@
+
diff --git a/docs/docs/guides/tracking/feedback.md b/docs/docs/guides/tracking/feedback.md
index 645c1399f17..ffbff6532c3 100644
--- a/docs/docs/guides/tracking/feedback.md
+++ b/docs/docs/guides/tracking/feedback.md
@@ -71,6 +71,40 @@ call.feedback.add_note("this is a note")
 call.feedback.add("correctness", { "value": 5 })
 ```
 
+### Retrieving the Call UUID
+
+For scenarios where you need to add feedback immediately after a call, you can retrieve the call UUID programmatically during or after the call execution. Here is how to get the UUID of the call from within the operation:
+
+```python
+
+import weave
+weave.init("uuid")
+
+@weave.op()
+def simple_operation(input_value):
+    # Perform some simple operation
+    output = f"Processed {input_value}"
+    # Get the current call ID
+    current_call = weave.get_current_call()
+    call_id = current_call.id
+    return output, call_id
+```
+
+Additionally, you can use call() method to execute the operation and retrieve the call ID after execution of the function:
+
+```python
+import weave
+weave.init("uuid")
+
+@weave.op()
+def simple_operation(input_value):
+    return f"Processed {input_value}"
+
+# Execute the operation and retrieve the result and call ID
+result, call = simple_operation.call("example input")
+call_id = call.id
+```
+
 ### Querying feedback on a call
 
 ```python
diff --git a/docs/docs/reference/gen_notebooks/intro_notebook.md b/docs/docs/reference/gen_notebooks/01-intro_notebook.md
similarity index 100%
rename from docs/docs/reference/gen_notebooks/intro_notebook.md
rename to docs/docs/reference/gen_notebooks/01-intro_notebook.md
diff --git a/docs/docs/reference/gen_notebooks/chain_of_density.md b/docs/docs/reference/gen_notebooks/chain_of_density.md
new file mode 100644
index 00000000000..caa9e6da805
--- /dev/null
+++ b/docs/docs/reference/gen_notebooks/chain_of_density.md
@@ -0,0 +1,381 @@
+---
+title: Chain of Density Summarization
+---
+
+
+:::tip[This is a notebook]
+
+<a href="https://colab.research.google.com/github/wandb/weave/blob/master/docs/./notebooks/chain_of_density.ipynb" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link button button--secondary button--med margin-right--sm notebook-cta-button"><div><img src="https://upload.wikimedia.org/wikipedia/commons/archive/d/d0/20221103151430%21Google_Colaboratory_SVG_Logo.svg" alt="Open In Colab" height="20px" /><div>Open in Colab</div></div></a>
+
+<a href="https://github.com/wandb/weave/blob/master/docs/./notebooks/chain_of_density.ipynb" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link button button--secondary button--med margin-right--sm notebook-cta-button"><div><img src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" alt="View in Github" height="15px" /><div>View in Github</div></div></a>
+
+:::
+
+
+
+<img src="http://wandb.me/logo-im-png" width="400" alt="Weights & Biases" />
+<!--- @wandbcode{cod-notebook} -->
+
+# Summarization using Chain of Density
+
+Summarizing complex technical documents while preserving crucial details is a challenging task. The Chain of Density (CoD) summarization technique offers a solution by iteratively refining summaries to be more concise and information-dense. This guide demonstrates how to implement CoD using Weave for tracking and evaluating the application. 
+
+## What is Chain of Density Summarization?
+
+[![arXiv](https://img.shields.io/badge/arXiv-2309.04269-b31b1b.svg)](https://arxiv.org/abs/2309.04269)
+
+Chain of Density (CoD) is an iterative summarization technique that produces increasingly concise and information-dense summaries. It works by:
+
+1. Starting with an initial summary
+2. Iteratively refining the summary, making it more concise while preserving key information
+3. Increasing the density of entities and technical details with each iteration
+
+This approach is particularly useful for summarizing scientific papers or technical documents where preserving detailed information is crucial.
+
+## Why use Weave?
+
+In this tutorial, we'll use Weave to implement and evaluate a Chain of Density summarization pipeline for ArXiv papers. You'll learn how to:
+
+1. **Track your LLM pipeline**: Use Weave to automatically log inputs, outputs, and intermediate steps of your summarization process.
+2. **Evaluate LLM outputs**: Create rigorous, apples-to-apples evaluations of your summaries using Weave's built-in tools.
+3. **Build composable operations**: Combine and reuse Weave operations across different parts of your summarization pipeline.
+4. **Integrate seamlessly**: Add Weave to your existing Python code with minimal overhead.
+
+By the end of this tutorial, you'll have created a CoD summarization pipeline that leverages Weave's capabilities for model serving, evaluation, and result tracking.
+
+## Set up the environment
+
+First, let's set up our environment and import the necessary libraries:
+
+
+```python
+!pip install -qU anthropic weave pydantic requests PyPDF2 set-env-colab-kaggle-dotenv
+```
+
+>To get an Anthropic API key:
+> 1. Sign up for an account at https://www.anthropic.com
+> 2. Navigate to the API section in your account settings
+> 3. Generate a new API key
+> 4. Store the API key securely in your .env file
+
+
+```python
+import io
+import os
+from datetime import datetime, timezone
+
+import anthropic
+import requests
+from pydantic import BaseModel
+from PyPDF2 import PdfReader
+from set_env import set_env
+
+import weave
+
+set_env("WANDB_API_KEY")
+set_env("ANTHROPIC_API_KEY")
+
+weave.init("summarization-chain-of-density-cookbook")
+anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+```
+
+We're using Weave to track our experiment and Anthropic's Claude model for text generation. The `weave.init(<project name>)` call sets up a new Weave project for our summarization task.
+
+## Define the ArxivPaper model
+
+We'll create a simple `ArxivPaper` class to represent our data:
+
+
+```python
+# Define ArxivPaper model
+class ArxivPaper(BaseModel):
+    entry_id: str
+    updated: datetime
+    published: datetime
+    title: str
+    authors: list[str]
+    summary: str
+    pdf_url: str
+
+
+# Create sample ArxivPaper
+arxiv_paper = ArxivPaper(
+    entry_id="http://arxiv.org/abs/2406.04744v1",
+    updated=datetime(2024, 6, 7, 8, 43, 7, tzinfo=timezone.utc),
+    published=datetime(2024, 6, 7, 8, 43, 7, tzinfo=timezone.utc),
+    title="CRAG -- Comprehensive RAG Benchmark",
+    authors=["Xiao Yang", "Kai Sun", "Hao Xin"],  # Truncated for brevity
+    summary="Retrieval-Augmented Generation (RAG) has recently emerged as a promising solution...",  # Truncated
+    pdf_url="https://arxiv.org/pdf/2406.04744",
+)
+```
+
+This class encapsulates the metadata and content of an ArXiv paper, which will be the input to our summarization pipeline.
+
+## Load PDF content
+
+To work with the full paper content, we'll add a function to load and extract text from PDFs:
+
+
+```python
+@weave.op()
+def load_pdf(pdf_url: str) -> str:
+    # Download the PDF
+    response = requests.get(pdf_url)
+    pdf_file = io.BytesIO(response.content)
+
+    # Read the PDF
+    pdf_reader = PdfReader(pdf_file)
+
+    # Extract text from all pages
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+
+    return text
+```
+
+## Implement Chain of Density summarization
+
+Now, let's implement the core CoD summarization logic using Weave operations:
+
+
+```python
+# Chain of Density Summarization
+@weave.op()
+def summarize_current_summary(
+    document: str,
+    instruction: str,
+    current_summary: str = "",
+    iteration: int = 1,
+    model: str = "claude-3-sonnet-20240229",
+):
+    prompt = f"""
+    Document: {document}
+    Current summary: {current_summary}
+    Instruction to focus on: {instruction}
+    Iteration: {iteration}
+
+    Generate an increasingly concise, entity-dense, and highly technical summary from the provided document that specifically addresses the given instruction.
+    """
+    response = anthropic_client.messages.create(
+        model=model, max_tokens=4096, messages=[{"role": "user", "content": prompt}]
+    )
+    return response.content[0].text
+
+
+@weave.op()
+def iterative_density_summarization(
+    document: str,
+    instruction: str,
+    current_summary: str,
+    density_iterations: int,
+    model: str = "claude-3-sonnet-20240229",
+):
+    iteration_summaries = []
+    for iteration in range(1, density_iterations + 1):
+        current_summary = summarize_current_summary(
+            document, instruction, current_summary, iteration, model
+        )
+        iteration_summaries.append(current_summary)
+    return current_summary, iteration_summaries
+
+
+@weave.op()
+def final_summary(
+    instruction: str, current_summary: str, model: str = "claude-3-sonnet-20240229"
+):
+    prompt = f"""
+    Given this summary: {current_summary}
+    And this instruction to focus on: {instruction}
+    Create an extremely dense, final summary that captures all key technical information in the most concise form possible, while specifically addressing the given instruction.
+    """
+    return (
+        anthropic_client.messages.create(
+            model=model, max_tokens=4096, messages=[{"role": "user", "content": prompt}]
+        )
+        .content[0]
+        .text
+    )
+
+
+@weave.op()
+def chain_of_density_summarization(
+    document: str,
+    instruction: str,
+    current_summary: str = "",
+    model: str = "claude-3-sonnet-20240229",
+    density_iterations: int = 2,
+):
+    current_summary, iteration_summaries = iterative_density_summarization(
+        document, instruction, current_summary, density_iterations, model
+    )
+    final_summary_text = final_summary(instruction, current_summary, model)
+    return {
+        "final_summary": final_summary_text,
+        "accumulated_summary": current_summary,
+        "iteration_summaries": iteration_summaries,
+    }
+```
+
+Here's what each function does:
+
+- `summarize_current_summary`: Generates a single summary iteration based on the current state.
+- `iterative_density_summarization`: Applies the CoD technique by calling `summarize_current_summary` multiple times.
+- `chain_of_density_summarization`: Orchestrates the entire summarization process and returns the results.
+
+By using `@weave.op()` decorators, we ensure that Weave tracks the inputs, outputs, and execution of these functions.
+
+
+## Create a Weave Model
+
+Now, let's wrap our summarization pipeline in a Weave Model:
+
+
+```python
+# Weave Model
+class ArxivChainOfDensityPipeline(weave.Model):
+    model: str = "claude-3-sonnet-20240229"
+    density_iterations: int = 3
+
+    @weave.op()
+    def predict(self, paper: ArxivPaper, instruction: str) -> dict:
+        text = load_pdf(paper["pdf_url"])
+        result = chain_of_density_summarization(
+            text,
+            instruction,
+            model=self.model,
+            density_iterations=self.density_iterations,
+        )
+        return result
+```
+
+This `ArxivChainOfDensityPipeline` class encapsulates our summarization logic as a Weave Model, providing several key benefits:
+
+1. Automatic experiment tracking: Weave captures inputs, outputs, and parameters for each run of the model.
+2. Versioning: Changes to the model's attributes or code are automatically versioned, creating a clear history of how your summarization pipeline evolves over time.
+3. Reproducibility: The versioning and tracking make it easy to reproduce any previous result or configuration of your summarization pipeline.
+4. Hyperparameter management: Model attributes (like `model` and `density_iterations`) are clearly defined and tracked across different runs, facilitating experimentation.
+5. Integration with Weave ecosystem: Using `weave.Model` allows seamless integration with other Weave tools, such as evaluations and serving capabilities.
+
+## Implement evaluation metrics
+
+To assess the quality of our summaries, we'll implement simple evaluation metrics:
+
+
+```python
+import json
+
+
+@weave.op()
+def evaluate_summary(
+    summary: str, instruction: str, model: str = "claude-3-sonnet-20240229"
+) -> dict:
+    prompt = f"""
+    Summary: {summary}
+    Instruction: {instruction}
+
+    Evaluate the summary based on the following criteria:
+    1. Relevance (1-5): How well does the summary address the given instruction?
+    2. Conciseness (1-5): How concise is the summary while retaining key information?
+    3. Technical Accuracy (1-5): How accurately does the summary convey technical details?
+
+    Your response MUST be in the following JSON format:
+    {{
+        "relevance": {{
+            "score": <int>,
+            "explanation": "<string>"
+        }},
+        "conciseness": {{
+            "score": <int>,
+            "explanation": "<string>"
+        }},
+        "technical_accuracy": {{
+            "score": <int>,
+            "explanation": "<string>"
+        }}
+    }}
+
+    Ensure that the scores are integers between 1 and 5, and that the explanations are concise.
+    """
+    response = anthropic_client.messages.create(
+        model=model, max_tokens=1000, messages=[{"role": "user", "content": prompt}]
+    )
+    print(response.content[0].text)
+
+    eval_dict = json.loads(response.content[0].text)
+
+    return {
+        "relevance": eval_dict["relevance"]["score"],
+        "conciseness": eval_dict["conciseness"]["score"],
+        "technical_accuracy": eval_dict["technical_accuracy"]["score"],
+        "average_score": sum(eval_dict[k]["score"] for k in eval_dict) / 3,
+        "evaluation_text": response.content[0].text,
+    }
+```
+
+These evaluation functions use the Claude model to assess the quality of the generated summaries based on relevance, conciseness, and technical accuracy.
+
+## Create a Weave Dataset and run evaluation
+
+To evaluate our pipeline, we'll create a Weave Dataset and run an evaluation:
+
+
+```python
+# Create a Weave Dataset
+dataset = weave.Dataset(
+    name="arxiv_papers",
+    rows=[
+        {
+            "paper": arxiv_paper,
+            "instruction": "What was the approach to experimenting with different data mixtures?",
+        },
+    ],
+)
+
+weave.publish(dataset)
+```
+
+For our evaluation, we'll use an LLM-as-a-judge approach. This technique involves using a language model to assess the quality of outputs generated by another model or system. It leverages the LLM's understanding and reasoning capabilities to provide nuanced evaluations, especially for tasks where traditional metrics may fall short.
+
+[![arXiv](https://img.shields.io/badge/arXiv-2306.05685-b31b1b.svg)](https://arxiv.org/abs/2306.05685)
+
+
+```python
+# Define the scorer function
+@weave.op()
+def quality_scorer(instruction: str, model_output: dict) -> dict:
+    result = evaluate_summary(model_output["final_summary"], instruction)
+    return result
+```
+
+
+```python
+# Run evaluation
+evaluation = weave.Evaluation(dataset=dataset, scorers=[quality_scorer])
+arxiv_chain_of_density_pipeline = ArxivChainOfDensityPipeline()
+results = await evaluation.evaluate(arxiv_chain_of_density_pipeline)
+```
+
+This code creates a dataset with our sample ArXiv paper, defines a quality scorer, and runs an evaluation of our summarization pipeline.
+
+## Conclusion
+
+In this example, we've demonstrated how to implement a Chain of Density summarization pipeline for ArXiv papers using Weave. We've shown how to:
+
+1. Create Weave operations for each step of the summarization process
+2. Wrap the pipeline in a Weave Model for easy tracking and evaluation
+3. Implement custom evaluation metrics using Weave operations
+4. Create a dataset and run an evaluation of the pipeline
+
+Weave's seamless integration allows us to track inputs, outputs, and intermediate steps throughout the summarization process, making it easier to debug, optimize, and evaluate our LLM application.
+You can extend this example to handle larger datasets, implement more sophisticated evaluation metrics, or integrate with other LLM workflows.
+
+<a 
+  href="https://wandb.ai/wandb_fc/arxiv-reader/reports/Building-a-bot-to-summarize-arXiv-papers-as-PDFs-using-Anthrophic-and-W-B-Weave--Vmlldzo4Nzg0ODI4"
+  target="_blank"
+  rel="noopener noreferrer"
+  className="button button--primary button--lg"
+>
+  View Full Report on W&B
+</a>
diff --git a/docs/docs/reference/gen_notebooks/dspy_prompt_optimization.md b/docs/docs/reference/gen_notebooks/dspy_prompt_optimization.md
new file mode 100644
index 00000000000..d39ccbaa14e
--- /dev/null
+++ b/docs/docs/reference/gen_notebooks/dspy_prompt_optimization.md
@@ -0,0 +1,282 @@
+---
+title: Prompt Optimization
+---
+
+
+:::tip[This is a notebook]
+
+<a href="https://colab.research.google.com/github/wandb/weave/blob/master/docs/./notebooks/dspy_prompt_optimization.ipynb" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link button button--secondary button--med margin-right--sm notebook-cta-button"><div><img src="https://upload.wikimedia.org/wikipedia/commons/archive/d/d0/20221103151430%21Google_Colaboratory_SVG_Logo.svg" alt="Open In Colab" height="20px" /><div>Open in Colab</div></div></a>
+
+<a href="https://github.com/wandb/weave/blob/master/docs/./notebooks/dspy_prompt_optimization.ipynb" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link button button--secondary button--med margin-right--sm notebook-cta-button"><div><img src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" alt="View in Github" height="15px" /><div>View in Github</div></div></a>
+
+:::
+
+
+
+<img src="http://wandb.me/logo-im-png" width="400" alt="Weights & Biases" />
+<!--- @wandbcode{prompt-optim-notebook} -->
+
+# Optimizing LLM Workflows Using DSPy and Weave
+
+The [BIG-bench (Beyond the Imitation Game Benchmark)](https://github.com/google/BIG-bench) is a collaborative benchmark intended to probe large language models and extrapolate their future capabilities consisting of more than 200 tasks. The [BIG-Bench Hard (BBH)](https://github.com/suzgunmirac/BIG-Bench-Hard) is a suite of 23 most challenging BIG-Bench tasks that can be quite difficult to be solved using the current generation of language models.
+
+This tutorial demonstrates how we can improve the performance of our LLM workflow implemented  on the **causal judgement task** from the BIG-bench Hard benchmark and evaluate our prompting strategies. We will use [DSPy](https://dspy-docs.vercel.app/) for implementing our LLM workflow and optimizing our prompting strategy. We will also use [Weave](../../introduction.md) to track our LLM workflow and evaluate our prompting strategies.
+
+## Installing the Dependencies
+
+We need the following libraries for this tutorial:
+
+- [DSPy](https://dspy-docs.vercel.app/) for building the LLM workflow and optimizing it.
+- [Weave](../../introduction.md) to track our LLM workflow and evaluate our prompting strategies.
+- [datasets](https://huggingface.co/docs/datasets/index) to access the Big-Bench Hard dataset from HuggingFace Hub.
+
+
+```python
+!pip install -qU dspy-ai weave datasets
+```
+
+Since we'll be using [OpenAI API](https://openai.com/index/openai-api/) as the LLM Vendor, we will also need an OpenAI API key. You can [sign up](https://platform.openai.com/signup) on the OpenAI platform to get your own API key.
+
+
+```python
+import os
+from getpass import getpass
+
+api_key = getpass("Enter you OpenAI API key: ")
+os.environ["OPENAI_API_KEY"] = api_key
+```
+
+## Enable Tracking using Weave
+
+Weave is currently integrated with DSPy, and including [`weave.init`](../../reference/python-sdk/weave/index.md) at the start of our code lets us automatically trace our DSPy functions which can be explored in the Weave UI. Check out the [Weave integration docs for DSPy](../../guides/integrations/dspy.md) to learn more.
+
+
+
+```python
+import weave
+
+weave.init(project_name="dspy-bigbench-hard")
+```
+
+In this tutorial, we use a metadata class inherited from [`weave.Object`](../../guides/tracking/objects.md) to manage our metadata.
+
+
+```python
+class Metadata(weave.Object):
+    dataset_address: str = "maveriq/bigbenchhard"
+    big_bench_hard_task: str = "causal_judgement"
+    num_train_examples: int = 50
+    openai_model: str = "gpt-3.5-turbo"
+    openai_max_tokens: int = 2048
+    max_bootstrapped_demos: int = 8
+    max_labeled_demos: int = 8
+
+
+metadata = Metadata()
+```
+
+:::tip Object Versioning
+The `Metadata` objects are automatically versioned and traced when functions consuming them are traced
+:::
+
+## Load the BIG-Bench Hard Dataset
+
+We will load this dataset from HuggingFace Hub, split into training and validation sets, and [publish](../../guides/core-types/datasets.md) them on Weave, this will let us version the datasets, and also use [`weave.Evaluation`](../../guides/core-types/evaluations.md) to evaluate our prompting strategy.
+
+
+```python
+import dspy
+from datasets import load_dataset
+
+
+@weave.op()
+def get_dataset(metadata: Metadata):
+    # load the BIG-Bench Hard dataset corresponding to the task from Huggingface Hug
+    dataset = load_dataset(metadata.dataset_address, metadata.big_bench_hard_task)[
+        "train"
+    ]
+
+    # create the training and validation datasets
+    rows = [{"question": data["input"], "answer": data["target"]} for data in dataset]
+    train_rows = rows[0 : metadata.num_train_examples]
+    val_rows = rows[metadata.num_train_examples :]
+
+    # create the training and validation examples consisting of `dspy.Example` objects
+    dspy_train_examples = [
+        dspy.Example(row).with_inputs("question") for row in train_rows
+    ]
+    dspy_val_examples = [dspy.Example(row).with_inputs("question") for row in val_rows]
+
+    # publish the datasets to the Weave, this would let us version the data and use for evaluation
+    weave.publish(
+        weave.Dataset(
+            name=f"bigbenchhard_{metadata.big_bench_hard_task}_train", rows=train_rows
+        )
+    )
+    weave.publish(
+        weave.Dataset(
+            name=f"bigbenchhard_{metadata.big_bench_hard_task}_val", rows=val_rows
+        )
+    )
+
+    return dspy_train_examples, dspy_val_examples
+
+
+dspy_train_examples, dspy_val_examples = get_dataset(metadata)
+```
+
+## The DSPy Program
+
+[DSPy](https://dspy-docs.vercel.app) is a framework that pushes building new LM pipelines away from manipulating free-form strings and closer to programming (composing modular operators to build text transformation graphs) where a compiler automatically generates optimized LM invocation strategies and prompts from a program.
+
+We will use the [`dspy.OpenAI`](https://dspy-docs.vercel.app/api/language_model_clients/OpenAI) abstraction to make LLM calls to [GPT3.5 Turbo](https://platform.openai.com/docs/models/gpt-3-5-turbo).
+
+
+```python
+system_prompt = """
+You are an expert in the field of causal reasoning. You are to analyze the a given question carefully and answer in `Yes` or `No`.
+You should also provide a detailed explanation justifying your answer.
+"""
+
+llm = dspy.OpenAI(model="gpt-3.5-turbo", system_prompt=system_prompt)
+dspy.settings.configure(lm=llm)
+```
+
+### Writing the Causal Reasoning Signature
+
+A [signature](https://dspy-docs.vercel.app/docs/building-blocks/signatures) is a declarative specification of input/output behavior of a [DSPy module](https://dspy-docs.vercel.app/docs/building-blocks/modules) which are task-adaptive components—akin to neural network layers—that abstract any particular text transformation.
+
+
+```python
+from pydantic import BaseModel, Field
+
+
+class Input(BaseModel):
+    query: str = Field(description="The question to be answered")
+
+
+class Output(BaseModel):
+    answer: str = Field(description="The answer for the question")
+    confidence: float = Field(
+        ge=0, le=1, description="The confidence score for the answer"
+    )
+    explanation: str = Field(description="The explanation for the answer")
+
+
+class QuestionAnswerSignature(dspy.Signature):
+    input: Input = dspy.InputField()
+    output: Output = dspy.OutputField()
+
+
+class CausalReasoningModule(dspy.Module):
+    def __init__(self):
+        self.prog = dspy.TypedPredictor(QuestionAnswerSignature)
+
+    @weave.op()
+    def forward(self, question) -> dict:
+        return self.prog(input=Input(query=question)).output.dict()
+```
+
+Let's test our LLM workflow, i.e., the `CausalReasoningModule` on an example from the causal reasoning subset of Big-Bench Hard.
+
+
+```python
+import rich
+
+baseline_module = CausalReasoningModule()
+
+prediction = baseline_module(dspy_train_examples[0]["question"])
+rich.print(prediction)
+```
+
+## Evaluating our DSPy Program
+
+Now that we have a baseline prompting strategy, let's evaluate it on our validation set using [`weave.Evaluation`](../../guides/core-types/evaluations.md) on a simple metric that matches the predicted answer with the ground truth. Weave will take each example, pass it through your application and score the output on multiple custom scoring functions. By doing this, you'll have a view of the performance of your application, and a rich UI to drill into individual outputs and scores.
+
+First, we need to create a simple weave evaluation scoring function that tells whether the answer from the baseline module's output is the same as the ground truth answer or not. Scoring functions need to have a `model_output` keyword argument, but the other arguments are user defined and are taken from the dataset examples. It will only take the necessary keys by using a dictionary key based on the argument name.
+
+
+```python
+@weave.op()
+def weave_evaluation_scorer(answer: str, model_output: Output) -> dict:
+    return {"match": int(answer.lower() == model_output["answer"].lower())}
+```
+
+Next, we can simply define the evaluation and run it.
+
+
+```python
+validation_dataset = weave.ref(
+    f"bigbenchhard_{metadata.big_bench_hard_task}_val:v0"
+).get()
+
+evaluation = weave.Evaluation(
+    name="baseline_causal_reasoning_module",
+    dataset=validation_dataset,
+    scorers=[weave_evaluation_scorer],
+)
+
+await evaluation.evaluate(baseline_module.forward)
+```
+
+:::note
+If you're running from a python script, you can use the following code to run the evaluation:
+
+```python
+import asyncio
+asyncio.run(evaluation.evaluate(baseline_module.forward))
+```
+:::
+
+:::warning
+Running the evaluation causal reasoning dataset will cost approximately $0.24 in OpenAI credits.
+:::
+
+## Optimizing our DSPy Program
+
+Now, that we have a baseline DSPy program, let us try to improve its performance for causal reasoning using a [DSPy teleprompter](https://dspy-docs.vercel.app/docs/building-blocks/optimizers) that can tune the parameters of a DSPy program to maximize the specified metrics. In this tutorial, we use the [BootstrapFewShot](https://dspy-docs.vercel.app/api/category/optimizers) teleprompter.
+
+
+```python
+from dspy.teleprompt import BootstrapFewShot
+
+
+@weave.op()
+def get_optimized_program(model: dspy.Module, metadata: Metadata) -> dspy.Module:
+    @weave.op()
+    def dspy_evaluation_metric(true, prediction, trace=None):
+        return prediction["answer"].lower() == true.answer.lower()
+
+    teleprompter = BootstrapFewShot(
+        metric=dspy_evaluation_metric,
+        max_bootstrapped_demos=metadata.max_bootstrapped_demos,
+        max_labeled_demos=metadata.max_labeled_demos,
+    )
+    return teleprompter.compile(model, trainset=dspy_train_examples)
+
+
+optimized_module = get_optimized_program(baseline_module, metadata)
+```
+
+:::warning
+Running the evaluation causal reasoning dataset will cost approximately $0.04 in OpenAI credits.
+:::
+
+Now that we have our optimized program (the optimized prompting strategy), let's evaluate it once again on our validation set and compare it with our baseline DSPy program.
+
+
+```python
+evaluation = weave.Evaluation(
+    name="optimized_causal_reasoning_module",
+    dataset=validation_dataset,
+    scorers=[weave_evaluation_scorer],
+)
+
+await evaluation.evaluate(optimized_module.forward)
+```
+
+When coomparing the evalution of the baseline program with the optimized one shows that the optimized program answers the causal reasoning questions with siginificantly more accuracy.
+
+## Conclusion
+
+In this tutorial, we learned how to use DSPy for prompt optimization alongside using Weave for tracking and evaluation to compare the original and optimized programs.
diff --git a/docs/docs/tutorial-tracing_2.md b/docs/docs/tutorial-tracing_2.md
index 108571cf650..da1980f1155 100644
--- a/docs/docs/tutorial-tracing_2.md
+++ b/docs/docs/tutorial-tracing_2.md
@@ -5,7 +5,6 @@ In the [Track LLM inputs & outputs](/quickstart) tutorial, the basics of trackin
 In this tutorial you will learn how to:
 - **Track data** as it flows though your application
 - **Track metadata** at call time
-- **Export data** that was logged to Weave
 
 ## Tracking nested function calls
 
diff --git a/docs/notebooks/chain_of_density.ipynb b/docs/notebooks/chain_of_density.ipynb
new file mode 100644
index 00000000000..2fb6ecc3f45
--- /dev/null
+++ b/docs/notebooks/chain_of_density.ipynb
@@ -0,0 +1,542 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<!-- docusaurus_head_meta::start\n",
+    "---\n",
+    "title: Chain of Density Summarization\n",
+    "---\n",
+    "docusaurus_head_meta::end -->\n",
+    "\n",
+    "<img src=\"http://wandb.me/logo-im-png\" width=\"400\" alt=\"Weights & Biases\" />\n",
+    "<!--- @wandbcode{cod-notebook} -->"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Summarization using Chain of Density\n",
+    "\n",
+    "Summarizing complex technical documents while preserving crucial details is a challenging task. The Chain of Density (CoD) summarization technique offers a solution by iteratively refining summaries to be more concise and information-dense. This guide demonstrates how to implement CoD using Weave for tracking and evaluating the application. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## What is Chain of Density Summarization?\n",
+    "\n",
+    "[![arXiv](https://img.shields.io/badge/arXiv-2309.04269-b31b1b.svg)](https://arxiv.org/abs/2309.04269)\n",
+    "\n",
+    "Chain of Density (CoD) is an iterative summarization technique that produces increasingly concise and information-dense summaries. It works by:\n",
+    "\n",
+    "1. Starting with an initial summary\n",
+    "2. Iteratively refining the summary, making it more concise while preserving key information\n",
+    "3. Increasing the density of entities and technical details with each iteration\n",
+    "\n",
+    "This approach is particularly useful for summarizing scientific papers or technical documents where preserving detailed information is crucial."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Why use Weave?\n",
+    "\n",
+    "In this tutorial, we'll use Weave to implement and evaluate a Chain of Density summarization pipeline for ArXiv papers. You'll learn how to:\n",
+    "\n",
+    "1. **Track your LLM pipeline**: Use Weave to automatically log inputs, outputs, and intermediate steps of your summarization process.\n",
+    "2. **Evaluate LLM outputs**: Create rigorous, apples-to-apples evaluations of your summaries using Weave's built-in tools.\n",
+    "3. **Build composable operations**: Combine and reuse Weave operations across different parts of your summarization pipeline.\n",
+    "4. **Integrate seamlessly**: Add Weave to your existing Python code with minimal overhead.\n",
+    "\n",
+    "By the end of this tutorial, you'll have created a CoD summarization pipeline that leverages Weave's capabilities for model serving, evaluation, and result tracking."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set up the environment\n",
+    "\n",
+    "First, let's set up our environment and import the necessary libraries:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -qU anthropic weave pydantic requests PyPDF2 set-env-colab-kaggle-dotenv"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    ">To get an Anthropic API key:\n",
+    "> 1. Sign up for an account at https://www.anthropic.com\n",
+    "> 2. Navigate to the API section in your account settings\n",
+    "> 3. Generate a new API key\n",
+    "> 4. Store the API key securely in your .env file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import io\n",
+    "import os\n",
+    "from datetime import datetime, timezone\n",
+    "\n",
+    "import anthropic\n",
+    "import requests\n",
+    "from pydantic import BaseModel\n",
+    "from PyPDF2 import PdfReader\n",
+    "from set_env import set_env\n",
+    "\n",
+    "import weave\n",
+    "\n",
+    "set_env(\"WANDB_API_KEY\")\n",
+    "set_env(\"ANTHROPIC_API_KEY\")\n",
+    "\n",
+    "weave.init(\"summarization-chain-of-density-cookbook\")\n",
+    "anthropic_client = anthropic.Anthropic(api_key=os.getenv(\"ANTHROPIC_API_KEY\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We're using Weave to track our experiment and Anthropic's Claude model for text generation. The `weave.init(<project name>)` call sets up a new Weave project for our summarization task."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define the ArxivPaper model\n",
+    "\n",
+    "We'll create a simple `ArxivPaper` class to represent our data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define ArxivPaper model\n",
+    "class ArxivPaper(BaseModel):\n",
+    "    entry_id: str\n",
+    "    updated: datetime\n",
+    "    published: datetime\n",
+    "    title: str\n",
+    "    authors: list[str]\n",
+    "    summary: str\n",
+    "    pdf_url: str\n",
+    "\n",
+    "\n",
+    "# Create sample ArxivPaper\n",
+    "arxiv_paper = ArxivPaper(\n",
+    "    entry_id=\"http://arxiv.org/abs/2406.04744v1\",\n",
+    "    updated=datetime(2024, 6, 7, 8, 43, 7, tzinfo=timezone.utc),\n",
+    "    published=datetime(2024, 6, 7, 8, 43, 7, tzinfo=timezone.utc),\n",
+    "    title=\"CRAG -- Comprehensive RAG Benchmark\",\n",
+    "    authors=[\"Xiao Yang\", \"Kai Sun\", \"Hao Xin\"],  # Truncated for brevity\n",
+    "    summary=\"Retrieval-Augmented Generation (RAG) has recently emerged as a promising solution...\",  # Truncated\n",
+    "    pdf_url=\"https://arxiv.org/pdf/2406.04744\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This class encapsulates the metadata and content of an ArXiv paper, which will be the input to our summarization pipeline."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load PDF content\n",
+    "\n",
+    "To work with the full paper content, we'll add a function to load and extract text from PDFs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@weave.op()\n",
+    "def load_pdf(pdf_url: str) -> str:\n",
+    "    # Download the PDF\n",
+    "    response = requests.get(pdf_url)\n",
+    "    pdf_file = io.BytesIO(response.content)\n",
+    "\n",
+    "    # Read the PDF\n",
+    "    pdf_reader = PdfReader(pdf_file)\n",
+    "\n",
+    "    # Extract text from all pages\n",
+    "    text = \"\"\n",
+    "    for page in pdf_reader.pages:\n",
+    "        text += page.extract_text()\n",
+    "\n",
+    "    return text"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Implement Chain of Density summarization\n",
+    "\n",
+    "Now, let's implement the core CoD summarization logic using Weave operations:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Chain of Density Summarization\n",
+    "@weave.op()\n",
+    "def summarize_current_summary(\n",
+    "    document: str,\n",
+    "    instruction: str,\n",
+    "    current_summary: str = \"\",\n",
+    "    iteration: int = 1,\n",
+    "    model: str = \"claude-3-sonnet-20240229\",\n",
+    "):\n",
+    "    prompt = f\"\"\"\n",
+    "    Document: {document}\n",
+    "    Current summary: {current_summary}\n",
+    "    Instruction to focus on: {instruction}\n",
+    "    Iteration: {iteration}\n",
+    "\n",
+    "    Generate an increasingly concise, entity-dense, and highly technical summary from the provided document that specifically addresses the given instruction.\n",
+    "    \"\"\"\n",
+    "    response = anthropic_client.messages.create(\n",
+    "        model=model, max_tokens=4096, messages=[{\"role\": \"user\", \"content\": prompt}]\n",
+    "    )\n",
+    "    return response.content[0].text\n",
+    "\n",
+    "\n",
+    "@weave.op()\n",
+    "def iterative_density_summarization(\n",
+    "    document: str,\n",
+    "    instruction: str,\n",
+    "    current_summary: str,\n",
+    "    density_iterations: int,\n",
+    "    model: str = \"claude-3-sonnet-20240229\",\n",
+    "):\n",
+    "    iteration_summaries = []\n",
+    "    for iteration in range(1, density_iterations + 1):\n",
+    "        current_summary = summarize_current_summary(\n",
+    "            document, instruction, current_summary, iteration, model\n",
+    "        )\n",
+    "        iteration_summaries.append(current_summary)\n",
+    "    return current_summary, iteration_summaries\n",
+    "\n",
+    "\n",
+    "@weave.op()\n",
+    "def final_summary(\n",
+    "    instruction: str, current_summary: str, model: str = \"claude-3-sonnet-20240229\"\n",
+    "):\n",
+    "    prompt = f\"\"\"\n",
+    "    Given this summary: {current_summary}\n",
+    "    And this instruction to focus on: {instruction}\n",
+    "    Create an extremely dense, final summary that captures all key technical information in the most concise form possible, while specifically addressing the given instruction.\n",
+    "    \"\"\"\n",
+    "    return (\n",
+    "        anthropic_client.messages.create(\n",
+    "            model=model, max_tokens=4096, messages=[{\"role\": \"user\", \"content\": prompt}]\n",
+    "        )\n",
+    "        .content[0]\n",
+    "        .text\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "@weave.op()\n",
+    "def chain_of_density_summarization(\n",
+    "    document: str,\n",
+    "    instruction: str,\n",
+    "    current_summary: str = \"\",\n",
+    "    model: str = \"claude-3-sonnet-20240229\",\n",
+    "    density_iterations: int = 2,\n",
+    "):\n",
+    "    current_summary, iteration_summaries = iterative_density_summarization(\n",
+    "        document, instruction, current_summary, density_iterations, model\n",
+    "    )\n",
+    "    final_summary_text = final_summary(instruction, current_summary, model)\n",
+    "    return {\n",
+    "        \"final_summary\": final_summary_text,\n",
+    "        \"accumulated_summary\": current_summary,\n",
+    "        \"iteration_summaries\": iteration_summaries,\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here's what each function does:\n",
+    "\n",
+    "- `summarize_current_summary`: Generates a single summary iteration based on the current state.\n",
+    "- `iterative_density_summarization`: Applies the CoD technique by calling `summarize_current_summary` multiple times.\n",
+    "- `chain_of_density_summarization`: Orchestrates the entire summarization process and returns the results.\n",
+    "\n",
+    "By using `@weave.op()` decorators, we ensure that Weave tracks the inputs, outputs, and execution of these functions.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a Weave Model\n",
+    "\n",
+    "Now, let's wrap our summarization pipeline in a Weave Model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Weave Model\n",
+    "class ArxivChainOfDensityPipeline(weave.Model):\n",
+    "    model: str = \"claude-3-sonnet-20240229\"\n",
+    "    density_iterations: int = 3\n",
+    "\n",
+    "    @weave.op()\n",
+    "    def predict(self, paper: ArxivPaper, instruction: str) -> dict:\n",
+    "        text = load_pdf(paper[\"pdf_url\"])\n",
+    "        result = chain_of_density_summarization(\n",
+    "            text,\n",
+    "            instruction,\n",
+    "            model=self.model,\n",
+    "            density_iterations=self.density_iterations,\n",
+    "        )\n",
+    "        return result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This `ArxivChainOfDensityPipeline` class encapsulates our summarization logic as a Weave Model, providing several key benefits:\n",
+    "\n",
+    "1. Automatic experiment tracking: Weave captures inputs, outputs, and parameters for each run of the model.\n",
+    "2. Versioning: Changes to the model's attributes or code are automatically versioned, creating a clear history of how your summarization pipeline evolves over time.\n",
+    "3. Reproducibility: The versioning and tracking make it easy to reproduce any previous result or configuration of your summarization pipeline.\n",
+    "4. Hyperparameter management: Model attributes (like `model` and `density_iterations`) are clearly defined and tracked across different runs, facilitating experimentation.\n",
+    "5. Integration with Weave ecosystem: Using `weave.Model` allows seamless integration with other Weave tools, such as evaluations and serving capabilities."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Implement evaluation metrics\n",
+    "\n",
+    "To assess the quality of our summaries, we'll implement simple evaluation metrics:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "\n",
+    "@weave.op()\n",
+    "def evaluate_summary(\n",
+    "    summary: str, instruction: str, model: str = \"claude-3-sonnet-20240229\"\n",
+    ") -> dict:\n",
+    "    prompt = f\"\"\"\n",
+    "    Summary: {summary}\n",
+    "    Instruction: {instruction}\n",
+    "\n",
+    "    Evaluate the summary based on the following criteria:\n",
+    "    1. Relevance (1-5): How well does the summary address the given instruction?\n",
+    "    2. Conciseness (1-5): How concise is the summary while retaining key information?\n",
+    "    3. Technical Accuracy (1-5): How accurately does the summary convey technical details?\n",
+    "\n",
+    "    Your response MUST be in the following JSON format:\n",
+    "    {{\n",
+    "        \"relevance\": {{\n",
+    "            \"score\": <int>,\n",
+    "            \"explanation\": \"<string>\"\n",
+    "        }},\n",
+    "        \"conciseness\": {{\n",
+    "            \"score\": <int>,\n",
+    "            \"explanation\": \"<string>\"\n",
+    "        }},\n",
+    "        \"technical_accuracy\": {{\n",
+    "            \"score\": <int>,\n",
+    "            \"explanation\": \"<string>\"\n",
+    "        }}\n",
+    "    }}\n",
+    "\n",
+    "    Ensure that the scores are integers between 1 and 5, and that the explanations are concise.\n",
+    "    \"\"\"\n",
+    "    response = anthropic_client.messages.create(\n",
+    "        model=model, max_tokens=1000, messages=[{\"role\": \"user\", \"content\": prompt}]\n",
+    "    )\n",
+    "    print(response.content[0].text)\n",
+    "\n",
+    "    eval_dict = json.loads(response.content[0].text)\n",
+    "\n",
+    "    return {\n",
+    "        \"relevance\": eval_dict[\"relevance\"][\"score\"],\n",
+    "        \"conciseness\": eval_dict[\"conciseness\"][\"score\"],\n",
+    "        \"technical_accuracy\": eval_dict[\"technical_accuracy\"][\"score\"],\n",
+    "        \"average_score\": sum(eval_dict[k][\"score\"] for k in eval_dict) / 3,\n",
+    "        \"evaluation_text\": response.content[0].text,\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "These evaluation functions use the Claude model to assess the quality of the generated summaries based on relevance, conciseness, and technical accuracy."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a Weave Dataset and run evaluation\n",
+    "\n",
+    "To evaluate our pipeline, we'll create a Weave Dataset and run an evaluation:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a Weave Dataset\n",
+    "dataset = weave.Dataset(\n",
+    "    name=\"arxiv_papers\",\n",
+    "    rows=[\n",
+    "        {\n",
+    "            \"paper\": arxiv_paper,\n",
+    "            \"instruction\": \"What was the approach to experimenting with different data mixtures?\",\n",
+    "        },\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "weave.publish(dataset)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For our evaluation, we'll use an LLM-as-a-judge approach. This technique involves using a language model to assess the quality of outputs generated by another model or system. It leverages the LLM's understanding and reasoning capabilities to provide nuanced evaluations, especially for tasks where traditional metrics may fall short."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![arXiv](https://img.shields.io/badge/arXiv-2306.05685-b31b1b.svg)](https://arxiv.org/abs/2306.05685)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define the scorer function\n",
+    "@weave.op()\n",
+    "def quality_scorer(instruction: str, model_output: dict) -> dict:\n",
+    "    result = evaluate_summary(model_output[\"final_summary\"], instruction)\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run evaluation\n",
+    "evaluation = weave.Evaluation(dataset=dataset, scorers=[quality_scorer])\n",
+    "arxiv_chain_of_density_pipeline = ArxivChainOfDensityPipeline()\n",
+    "results = await evaluation.evaluate(arxiv_chain_of_density_pipeline)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This code creates a dataset with our sample ArXiv paper, defines a quality scorer, and runs an evaluation of our summarization pipeline."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "\n",
+    "In this example, we've demonstrated how to implement a Chain of Density summarization pipeline for ArXiv papers using Weave. We've shown how to:\n",
+    "\n",
+    "1. Create Weave operations for each step of the summarization process\n",
+    "2. Wrap the pipeline in a Weave Model for easy tracking and evaluation\n",
+    "3. Implement custom evaluation metrics using Weave operations\n",
+    "4. Create a dataset and run an evaluation of the pipeline\n",
+    "\n",
+    "Weave's seamless integration allows us to track inputs, outputs, and intermediate steps throughout the summarization process, making it easier to debug, optimize, and evaluate our LLM application.\n",
+    "You can extend this example to handle larger datasets, implement more sophisticated evaluation metrics, or integrate with other LLM workflows.\n",
+    "\n",
+    "<a \n",
+    "  href=\"https://wandb.ai/wandb_fc/arxiv-reader/reports/Building-a-bot-to-summarize-arXiv-papers-as-PDFs-using-Anthrophic-and-W-B-Weave--Vmlldzo4Nzg0ODI4\"\n",
+    "  target=\"_blank\"\n",
+    "  rel=\"noopener noreferrer\"\n",
+    "  className=\"button button--primary button--lg\"\n",
+    ">\n",
+    "  View Full Report on W&B\n",
+    "</a>"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/notebooks/dspy_prompt_optimization.ipynb b/docs/notebooks/dspy_prompt_optimization.ipynb
index 94c6b485e15..573aaf7085a 100644
--- a/docs/notebooks/dspy_prompt_optimization.ipynb
+++ b/docs/notebooks/dspy_prompt_optimization.ipynb
@@ -6,15 +6,23 @@
    "source": [
     "<!-- docusaurus_head_meta::start\n",
     "---\n",
-    "title: Optimizing LLM Workflows Using DSPy and Weave\n",
+    "title: Prompt Optimization\n",
     "---\n",
     "docusaurus_head_meta::end -->\n",
     "\n",
+    "<img src=\"http://wandb.me/logo-im-png\" width=\"400\" alt=\"Weights & Biases\" />\n",
+    "<!--- @wandbcode{prompt-optim-notebook} -->"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "# Optimizing LLM Workflows Using DSPy and Weave\n",
     "\n",
     "The [BIG-bench (Beyond the Imitation Game Benchmark)](https://github.com/google/BIG-bench) is a collaborative benchmark intended to probe large language models and extrapolate their future capabilities consisting of more than 200 tasks. The [BIG-Bench Hard (BBH)](https://github.com/suzgunmirac/BIG-Bench-Hard) is a suite of 23 most challenging BIG-Bench tasks that can be quite difficult to be solved using the current generation of language models.\n",
     "\n",
-    "This tutorial demonstrates how we can improve the performance of our LLM workflow implemented  on the **causal judgement task** from the BIG-bench Hard benchmark and evaluate our prompting strategies. We will use [DSPy](https://dspy-docs.vercel.app/) for implementing our LLM workflow and optimizing our prompting strategy. We will also use [Weave](../docs/introduction.md) to track our LLM workflow and evaluate our prompting strategies."
+    "This tutorial demonstrates how we can improve the performance of our LLM workflow implemented  on the **causal judgement task** from the BIG-bench Hard benchmark and evaluate our prompting strategies. We will use [DSPy](https://dspy-docs.vercel.app/) for implementing our LLM workflow and optimizing our prompting strategy. We will also use [Weave](../../introduction.md) to track our LLM workflow and evaluate our prompting strategies."
    ]
   },
   {
@@ -26,7 +34,7 @@
     "We need the following libraries for this tutorial:\n",
     "\n",
     "- [DSPy](https://dspy-docs.vercel.app/) for building the LLM workflow and optimizing it.\n",
-    "- [Weave](../introduction.md) to track our LLM workflow and evaluate our prompting strategies.\n",
+    "- [Weave](../../introduction.md) to track our LLM workflow and evaluate our prompting strategies.\n",
     "- [datasets](https://huggingface.co/docs/datasets/index) to access the Big-Bench Hard dataset from HuggingFace Hub."
    ]
   },
@@ -65,7 +73,7 @@
    "source": [
     "## Enable Tracking using Weave\n",
     "\n",
-    "Weave is currently integrated with DSPy, and including [`weave.init`](../docs/reference/python-sdk/weave/index.md) at the start of our code lets us automatically trace our DSPy functions which can be explored in the Weave UI. Check out the [Weave integration docs for DSPy](../docs/guides/integrations/dspy.md) to learn more."
+    "Weave is currently integrated with DSPy, and including [`weave.init`](../../reference/python-sdk/weave/index.md) at the start of our code lets us automatically trace our DSPy functions which can be explored in the Weave UI. Check out the [Weave integration docs for DSPy](../../guides/integrations/dspy.md) to learn more.\n"
    ]
   },
   {
@@ -83,7 +91,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In this tutorial, we use a metadata class inherited from [`weave.Model`](../docs/guides/core-types/models.md) to manage our metadata."
+    "In this tutorial, we use a metadata class inherited from [`weave.Object`](../../guides/tracking/objects.md) to manage our metadata."
    ]
   },
   {
@@ -92,7 +100,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "class Metadata(weave.Model):\n",
+    "class Metadata(weave.Object):\n",
     "    dataset_address: str = \"maveriq/bigbenchhard\"\n",
     "    big_bench_hard_task: str = \"causal_judgement\"\n",
     "    num_train_examples: int = 50\n",
@@ -109,9 +117,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "| ![](../static/img/dspy_prompt_optimiztion/metadata.gif) |\n",
-    "|---|\n",
-    "| The `Metadata` objects are automatically versioned and traced when functions consuming them are traced |"
+    ":::tip Object Versioning\n",
+    "The `Metadata` objects are automatically versioned and traced when functions consuming them are traced\n",
+    ":::"
    ]
   },
   {
@@ -120,7 +128,7 @@
    "source": [
     "## Load the BIG-Bench Hard Dataset\n",
     "\n",
-    "We will load this dataset from HuggingFace Hub, split into training and validation sets, and [publish](../docs/guides/core-types/datasets.md) them on Weave, this will let us version the datasets, and also use [`weave.Evaluation`](../docs/guides/core-types/evaluations.md) to evaluate our prompting strategy."
+    "We will load this dataset from HuggingFace Hub, split into training and validation sets, and [publish](../../guides/core-types/datasets.md) them on Weave, this will let us version the datasets, and also use [`weave.Evaluation`](../../guides/core-types/evaluations.md) to evaluate our prompting strategy."
    ]
   },
   {
@@ -169,15 +177,6 @@
     "dspy_train_examples, dspy_val_examples = get_dataset(metadata)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "| ![](../static/img/dspy_prompt_optimiztion/datasets.gif) |\n",
-    "|---|\n",
-    "| The datasets, once published, can be explored in the Weave UI |"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -269,22 +268,13 @@
     "rich.print(prediction)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "| ![](../static/img/dspy_prompt_optimiztion/dspy_module_trace.gif) |\n",
-    "|---|\n",
-    "| Here's how you can explore the traces of the `CausalReasoningModule` in the Weave UI |"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Evaluating our DSPy Program\n",
     "\n",
-    "Now that we have a baseline prompting strategy, let's evaluate it on our validation set using [`weave.Evaluation`](../docs/guides/core-types/evaluations.md) on a simple metric that matches the predicted answer with the ground truth. Weave will take each example, pass it through your application and score the output on multiple custom scoring functions. By doing this, you'll have a view of the performance of your application, and a rich UI to drill into individual outputs and scores.\n",
+    "Now that we have a baseline prompting strategy, let's evaluate it on our validation set using [`weave.Evaluation`](../../guides/core-types/evaluations.md) on a simple metric that matches the predicted answer with the ground truth. Weave will take each example, pass it through your application and score the output on multiple custom scoring functions. By doing this, you'll have a view of the performance of your application, and a rich UI to drill into individual outputs and scores.\n",
     "\n",
     "First, we need to create a simple weave evaluation scoring function that tells whether the answer from the baseline module's output is the same as the ground truth answer or not. Scoring functions need to have a `model_output` keyword argument, but the other arguments are user defined and are taken from the dataset examples. It will only take the necessary keys by using a dictionary key based on the argument name."
    ]
@@ -387,10 +377,6 @@
     "Running the evaluation causal reasoning dataset will cost approximately $0.04 in OpenAI credits.\n",
     ":::\n",
     "\n",
-    "| ![](../static/img/dspy_prompt_optimiztion/dspy_compile.png) |\n",
-    "|---|\n",
-    "| You can explore the traces of the optimization process in the Weave UI.  |\n",
-    "\n",
     "Now that we have our optimized program (the optimized prompting strategy), let's evaluate it once again on our validation set and compare it with our baseline DSPy program."
    ]
   },
@@ -413,15 +399,17 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "| ![](../static/img/dspy_prompt_optimiztion/eval_comparison.gif) |\n",
-    "|---|\n",
-    "| Comparing the evalution of the baseline program with the optimized one shows that the optimized program answers the causal reasoning questions with siginificantly more accuracy. |"
+    "When coomparing the evalution of the baseline program with the optimized one shows that the optimized program answers the causal reasoning questions with siginificantly more accuracy."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": []
+   "source": [
+    "## Conclusion\n",
+    "\n",
+    "In this tutorial, we learned how to use DSPy for prompt optimization alongside using Weave for tracking and evaluation to compare the original and optimized programs."
+   ]
   }
  ],
  "metadata": {
diff --git a/docs/scripts/generate_notebooks.py b/docs/scripts/generate_notebooks.py
index ad3d8a10277..c3421dc8f73 100644
--- a/docs/scripts/generate_notebooks.py
+++ b/docs/scripts/generate_notebooks.py
@@ -54,7 +54,7 @@ def export_all_notebooks_in_primary_dir():
 def main():
     export_all_notebooks_in_primary_dir()
     export_notebook(
-        "./intro_notebook.ipynb", "./docs/reference/gen_notebooks/intro_notebook.md"
+        "./intro_notebook.ipynb", "./docs/reference/gen_notebooks/01-intro_notebook.md"
     )
 
 
diff --git a/docs/static/img/dspy_prompt_optimiztion/datasets.gif b/docs/static/img/dspy_prompt_optimiztion/datasets.gif
deleted file mode 100644
index 239c7c74767..00000000000
Binary files a/docs/static/img/dspy_prompt_optimiztion/datasets.gif and /dev/null differ
diff --git a/docs/static/img/dspy_prompt_optimiztion/dspy_compile.png b/docs/static/img/dspy_prompt_optimiztion/dspy_compile.png
deleted file mode 100644
index 1afff4bf60d..00000000000
Binary files a/docs/static/img/dspy_prompt_optimiztion/dspy_compile.png and /dev/null differ
diff --git a/docs/static/img/dspy_prompt_optimiztion/dspy_module_trace.gif b/docs/static/img/dspy_prompt_optimiztion/dspy_module_trace.gif
deleted file mode 100644
index 970da65a1dd..00000000000
Binary files a/docs/static/img/dspy_prompt_optimiztion/dspy_module_trace.gif and /dev/null differ
diff --git a/docs/static/img/dspy_prompt_optimiztion/eval_comparison.gif b/docs/static/img/dspy_prompt_optimiztion/eval_comparison.gif
deleted file mode 100644
index 2faccf15427..00000000000
Binary files a/docs/static/img/dspy_prompt_optimiztion/eval_comparison.gif and /dev/null differ
diff --git a/docs/static/img/dspy_prompt_optimiztion/metadata.gif b/docs/static/img/dspy_prompt_optimiztion/metadata.gif
deleted file mode 100644
index 5a00a7375f2..00000000000
Binary files a/docs/static/img/dspy_prompt_optimiztion/metadata.gif and /dev/null differ
diff --git a/requirements.txt b/requirements.txt
index 1729b6cb1ed..e8cb5513f35 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -42,6 +42,9 @@ numpy<2.0.0
 # Segment logging
 analytics-python>=1.2.9
 
+# Used for ISO date parsing.
+python-dateutil>=2.8.2
+
 # Used for version parsing in integrations.
 packaging>=21.0
 
diff --git a/weave-js/src/common/components/elements/LegacyWBIcon.tsx b/weave-js/src/common/components/elements/LegacyWBIcon.tsx
index b1fce5a4895..fa440a9ba03 100644
--- a/weave-js/src/common/components/elements/LegacyWBIcon.tsx
+++ b/weave-js/src/common/components/elements/LegacyWBIcon.tsx
@@ -26,6 +26,10 @@ export interface LegacyWBIconProps {
   style?: any;
 
   'data-test'?: any;
+
+  role?: string;
+  ariaHidden?: string;
+  ariaLabel?: string;
 }
 
 const LegacyWBIconComp = React.forwardRef<HTMLElement, LegacyWBIconProps>(
@@ -42,6 +46,10 @@ const LegacyWBIconComp = React.forwardRef<HTMLElement, LegacyWBIconProps>(
       onMouseLeave,
       style,
       'data-test': dataTest,
+      role,
+      title,
+      ariaHidden,
+      ariaLabel,
     },
     ref
   ) => {
@@ -59,6 +67,10 @@ const LegacyWBIconComp = React.forwardRef<HTMLElement, LegacyWBIconProps>(
       onMouseLeave,
       style,
       'data-test': dataTest,
+      role,
+      title,
+      'aria-hidden': ariaHidden,
+      'aria-label': ariaLabel,
     };
     if (ref == null) {
       return <Icon {...passProps} className={className} />;
diff --git a/weave-js/src/components/FancyPage/FancyPageSidebarSection.tsx b/weave-js/src/components/FancyPage/FancyPageSidebarSection.tsx
index 4603188d2f0..709c3ccb781 100644
--- a/weave-js/src/components/FancyPage/FancyPageSidebarSection.tsx
+++ b/weave-js/src/components/FancyPage/FancyPageSidebarSection.tsx
@@ -136,7 +136,11 @@ const FancyPageSidebarSection = (props: FancyPageSidebarSectionProps) => {
                 }}>
                 <SidebarButton>
                   <ItemIcon color={colorIconBg}>
-                    <Icon name={item.iconName} color={colorIcon} />
+                    <Icon
+                      name={item.iconName}
+                      color={colorIcon}
+                      role="presentation"
+                    />
                   </ItemIcon>
                   <ItemLabel color={colorText}>{item.name}</ItemLabel>
                 </SidebarButton>
@@ -159,7 +163,11 @@ const FancyPageSidebarSection = (props: FancyPageSidebarSectionProps) => {
         const button = (
           <SidebarButton>
             <ItemIcon color={colorIconBg}>
-              <Icon name={item.iconName} color={colorIcon} />
+              <Icon
+                name={item.iconName}
+                color={colorIcon}
+                role="presentation"
+              />
             </ItemIcon>
             <ItemLabel color={colorText}>{item.name}</ItemLabel>
           </SidebarButton>
diff --git a/weave-js/src/components/LinearProgress.tsx b/weave-js/src/components/LinearProgress.tsx
new file mode 100644
index 00000000000..6440fcba06b
--- /dev/null
+++ b/weave-js/src/components/LinearProgress.tsx
@@ -0,0 +1,25 @@
+/**
+ * Styled linear progress bar.
+ */
+
+import MuiLinearProgress, {
+  LinearProgressProps as MuiLinearProgressProps,
+} from '@mui/material/LinearProgress';
+import React from 'react';
+
+import * as Colors from '../common/css/color.styles';
+
+export const LinearProgress = (props: MuiLinearProgressProps) => {
+  return (
+    <MuiLinearProgress
+      {...props}
+      sx={{
+        backgroundColor: Colors.TEAL_300,
+        '& .MuiLinearProgress-bar': {
+          backgroundColor: Colors.TEAL_400,
+        },
+        height: 3, // Default is 4px
+      }}
+    />
+  );
+};
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse2/Browse2OpDefCode.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse2/Browse2OpDefCode.tsx
index 86d25d00e0e..38aa07823d8 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse2/Browse2OpDefCode.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse2/Browse2OpDefCode.tsx
@@ -3,6 +3,7 @@ import Box from '@mui/material/Box';
 import {Loading} from '@wandb/weave/components/Loading';
 import React, {FC} from 'react';
 
+import {Alert} from '../../../Alert';
 import {useWFHooks} from '../Browse3/pages/wfReactInterface/context';
 
 export const Browse2OpDefCode: FC<{uri: string; maxRowsInView?: number}> = ({
@@ -25,6 +26,17 @@ export const Browse2OpDefCode: FC<{uri: string; maxRowsInView?: number}> = ({
     );
   }
 
+  if (text.result == null) {
+    return (
+      <Box
+        sx={{
+          margin: '10px 16px 0 10px',
+        }}>
+        <Alert severity="warning">No code found for this operation</Alert>
+      </Box>
+    );
+  }
+
   const inner = (
     <Editor
       height={'100%'}
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/NotFoundPanel.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/NotFoundPanel.tsx
new file mode 100644
index 00000000000..54aeb4477aa
--- /dev/null
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/NotFoundPanel.tsx
@@ -0,0 +1,20 @@
+import {ErrorPanel} from '@wandb/weave/components/ErrorPanel';
+import React, {FC, useContext} from 'react';
+
+import {Button} from '../../../Button';
+import {useClosePeek, WeaveflowPeekContext} from './context';
+
+export const NotFoundPanel: FC<{title: string}> = ({title}) => {
+  const close = useClosePeek();
+  const {isPeeking} = useContext(WeaveflowPeekContext);
+  return (
+    <div style={{display: 'flex', flexDirection: 'column', height: '100%'}}>
+      <div style={{alignSelf: 'flex-end', margin: 10}}>
+        {isPeeking && <Button icon="close" variant="ghost" onClick={close} />}
+      </div>
+      <div style={{flex: 1}}>
+        <ErrorPanel title={title} subtitle="" subtitle2="" />
+      </div>
+    </div>
+  );
+};
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/SelectValue.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/SelectValue.tsx
index 8f3500fa773..8afa50db2ec 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/SelectValue.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/SelectValue.tsx
@@ -12,6 +12,7 @@ import {StyledDateTimePicker} from '../StyledDateTimePicker';
 import {
   getFieldType,
   getStringList,
+  isNumericOperator,
   isValuelessOperator,
   isWeaveRef,
 } from './common';
@@ -65,5 +66,6 @@ export const SelectValue = ({
     return <ValueInputBoolean value={value} onSetValue={onSetValue} />;
   }
 
-  return <TextValue value={value} onSetValue={onSetValue} />;
+  const type = isNumericOperator(operator) ? 'number' : 'text';
+  return <TextValue value={value} onSetValue={onSetValue} type={type} />;
 };
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/TextValue.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/TextValue.tsx
index be0da47ee56..4323d2090da 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/TextValue.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/TextValue.tsx
@@ -5,13 +5,14 @@ import {TextField} from '../../../../Form/TextField';
 type TextValueProps = {
   value: string;
   onSetValue: (value: string) => void;
+  type?: string;
 };
 
-export const TextValue = ({value, onSetValue}: TextValueProps) => {
+export const TextValue = ({value, onSetValue, type}: TextValueProps) => {
   // TODO: Need to debounce the value change.
   return (
     <div className="min-w-[200px]">
-      <TextField value={value} onChange={onSetValue} />
+      <TextField type={type} value={value} onChange={onSetValue} />
     </div>
   );
 };
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/common.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/common.ts
index 7c44c37a6e2..433882b9efa 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/common.ts
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/filters/common.ts
@@ -124,6 +124,10 @@ export const isValuelessOperator = (operator: string) => {
   return VALUELESS_OPERATORS.has(operator);
 };
 
+export const isNumericOperator = (operator: string) => {
+  return operator.startsWith('(number):');
+};
+
 export type SelectOperatorOption = {
   value: string;
   label: string;
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallPage.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallPage.tsx
index 517bb1b4e24..78af2e03159 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallPage.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallPage.tsx
@@ -1,5 +1,4 @@
 import Box from '@mui/material/Box';
-import {ErrorPanel} from '@wandb/weave/components/ErrorPanel';
 import {Loading} from '@wandb/weave/components/Loading';
 import {useViewTraceEvent} from '@wandb/weave/integrations/analytics/useViewEvents';
 import React, {FC, useCallback} from 'react';
@@ -9,12 +8,9 @@ import {makeRefCall} from '../../../../../../util/refs';
 import {Button} from '../../../../../Button';
 import {Tailwind} from '../../../../../Tailwind';
 import {Browse2OpDefCode} from '../../../Browse2/Browse2OpDefCode';
-import {
-  TRACETREE_PARAM,
-  useClosePeek,
-  useWeaveflowCurrentRouteContext,
-} from '../../context';
+import {TRACETREE_PARAM, useWeaveflowCurrentRouteContext} from '../../context';
 import {FeedbackGrid} from '../../feedback/FeedbackGrid';
+import {NotFoundPanel} from '../../NotFoundPanel';
 import {isEvaluateOp} from '../common/heuristics';
 import {CenteredAnimatedLoader} from '../common/Loader';
 import {SimplePageLayoutWithHeader} from '../common/SimplePageLayout';
@@ -26,7 +22,6 @@ import {CallDetails} from './CallDetails';
 import {CallOverview} from './CallOverview';
 import {CallSummary} from './CallSummary';
 import {CallTraceView, useCallFlattenedTraceTree} from './CallTraceView';
-
 export const CallPage: FC<{
   entity: string;
   project: string;
@@ -34,7 +29,6 @@ export const CallPage: FC<{
   path?: string;
 }> = props => {
   const {useCall} = useWFHooks();
-  const close = useClosePeek();
 
   const call = useCall({
     entity: props.entity,
@@ -45,16 +39,7 @@ export const CallPage: FC<{
   if (call.loading) {
     return <CenteredAnimatedLoader />;
   } else if (call.result === null) {
-    return (
-      <div style={{display: 'flex', flexDirection: 'column', height: '100%'}}>
-        <div style={{alignSelf: 'flex-end', margin: 10}}>
-          <Button icon="close" variant="ghost" onClick={close} />
-        </div>
-        <div style={{flex: 1}}>
-          <ErrorPanel title="Call not found" subtitle="" subtitle2="" />
-        </div>
-      </div>
-    );
+    return <NotFoundPanel title="Call not found" />;
   }
   return <CallPageInnerVertical {...props} call={call.result} />;
 };
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallSummary.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallSummary.tsx
index 793191fbd80..daa866cb4fe 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallSummary.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/CallSummary.tsx
@@ -32,7 +32,7 @@ export const CallSummary: React.FC<{
   );
 
   return (
-    <div style={{padding: 8}}>
+    <div style={{padding: 8, overflow: 'auto'}}>
       <SimpleKeyValueTable
         data={{
           Operation:
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/DataTableView.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/DataTableView.tsx
index e06bf7424e3..9560aafd1cf 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/DataTableView.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/DataTableView.tsx
@@ -31,7 +31,7 @@ import {useWFHooks} from '../wfReactInterface/context';
 import {TableQuery} from '../wfReactInterface/wfDataModelHooksInterface';
 
 // Controls the maximum number of rows to display in the table
-const MAX_ROWS = 1000;
+const MAX_ROWS = 10_000;
 
 // Controls whether to use a table for arrays or not.
 export const USE_TABLE_FOR_ARRAYS = false;
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/ValueViewString.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/ValueViewString.tsx
index 332a489f19a..440159fdd11 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/ValueViewString.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/ValueViewString.tsx
@@ -10,7 +10,7 @@ import {TargetBlank} from '../../../../../../common/util/links';
 import {Alert} from '../../../../../Alert';
 import {Button} from '../../../../../Button';
 import {CodeEditor} from '../../../../../CodeEditor';
-import {ValueViewStringFormatMenu} from './ValueViewStringFormatMenu';
+import {Format, ValueViewStringFormatMenu} from './ValueViewStringFormatMenu';
 
 type ValueViewStringProps = {
   value: string;
@@ -72,12 +72,35 @@ const PreserveWrapping = styled.div`
 `;
 PreserveWrapping.displayName = 'S.PreserveWrapping';
 
+// Regular expressions for common Markdown syntax
+// Note this is intentionally limited in scope to reduce false positives.
+const LIKELY_MARKDOWN_PATTERNS: RegExp[] = [
+  /```[\s\S]*```/, // Code block
+  /\[.+\]\(.+\)/, // Links [text](url)
+  /!\[.*\]\(.+\)/, // Images ![alt](url)
+];
+
+const isLikelyMarkdown = (value: string): boolean => {
+  return LIKELY_MARKDOWN_PATTERNS.some(pattern => pattern.test(value));
+};
+
+const getDefaultFormat = (value: string): Format => {
+  // TODO: Add JSON detection.
+  if (isLikelyMarkdown(value)) {
+    return 'Markdown';
+  }
+  return 'Text';
+};
+
 export const ValueViewString = ({value, isExpanded}: ValueViewStringProps) => {
   const trimmed = value.trim();
   const hasScrolling = trimmed.indexOf('\n') !== -1 || value.length > 100;
   const [hasFull, setHasFull] = useState(false);
 
-  const [format, setFormat] = useState('Text');
+  const [format, setFormat] = useState(getDefaultFormat(value));
+  useEffect(() => {
+    setFormat(getDefaultFormat(value));
+  }, [value]);
 
   const [mode, setMode] = useState(hasScrolling ? (isExpanded ? 1 : 0) : 0);
 
@@ -96,7 +119,7 @@ export const ValueViewString = ({value, isExpanded}: ValueViewStringProps) => {
     toast('Copied to clipboard');
   }, [value]);
 
-  const onSetFormat = (newFormat: string) => {
+  const onSetFormat = (newFormat: Format) => {
     setFormat(newFormat);
   };
 
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/ValueViewStringFormatMenu.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/ValueViewStringFormatMenu.tsx
index 2e4ab3dc45e..0b48cb7b180 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/ValueViewStringFormatMenu.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallPage/ValueViewStringFormatMenu.tsx
@@ -3,9 +3,11 @@ import React, {useState} from 'react';
 
 import {Button} from '../../../../../Button';
 
+export type Format = 'Text' | 'JSON' | 'Markdown' | 'Code';
+
 type ValueViewStringFormatMenuProps = {
-  format: string;
-  onSetFormat: (format: string) => void;
+  format: Format;
+  onSetFormat: (format: Format) => void;
 };
 
 // Unfortunately necessary to be visible above drawer.
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTable.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTable.tsx
index 578492f7bdb..0cddf7f1583 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTable.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTable.tsx
@@ -28,7 +28,6 @@ import {Checkbox} from '@wandb/weave/components/Checkbox/Checkbox';
 import React, {
   FC,
   useCallback,
-  useContext,
   useEffect,
   useMemo,
   useRef,
@@ -39,10 +38,7 @@ import {useHistory} from 'react-router-dom';
 import {useViewerInfo} from '../../../../../../common/hooks/useViewerInfo';
 import {A, TargetBlank} from '../../../../../../common/util/links';
 import {Tailwind} from '../../../../../Tailwind';
-import {
-  useWeaveflowCurrentRouteContext,
-  WeaveHeaderExtrasContext,
-} from '../../context';
+import {useWeaveflowCurrentRouteContext} from '../../context';
 import {getDefaultOperatorForValue} from '../../filters/common';
 import {FilterPanel} from '../../filters/FilterPanel';
 import {DEFAULT_PAGE_SIZE} from '../../grid/pagination';
@@ -162,7 +158,6 @@ export const CallsTable: FC<{
   setPaginationModel,
 }) => {
   const {loading: loadingUserInfo, userInfo} = useViewerInfo();
-  const {addExtra, removeExtra} = useContext(WeaveHeaderExtrasContext);
 
   const isReadonly =
     loadingUserInfo || !userInfo?.username || !userInfo?.teams.includes(entity);
@@ -508,38 +503,6 @@ export const CallsTable: FC<{
   // Register Compare Evaluations Button
   const history = useHistory();
   const router = useWeaveflowCurrentRouteContext();
-  useEffect(() => {
-    if (!isEvaluateTable) {
-      return;
-    }
-    addExtra('compareEvaluations', {
-      node: (
-        <CompareEvaluationsTableButton
-          onClick={() => {
-            history.push(
-              router.compareEvaluationsUri(entity, project, selectedCalls)
-            );
-          }}
-          disabled={selectedCalls.length === 0}
-        />
-      ),
-      order: 1,
-    });
-
-    return () => removeExtra('compareEvaluations');
-  }, [
-    apiRef,
-    addExtra,
-    removeExtra,
-    isEvaluateTable,
-    selectedCalls.length,
-    selectedCalls,
-    tableData,
-    router,
-    entity,
-    project,
-    history,
-  ]);
 
   // We really want to use columns here, but because visibleColumns
   // is a prop to ExportSelector, it causes infinite reloads.
@@ -552,98 +515,15 @@ export const CallsTable: FC<{
     return Array.from(keysSet);
   }, [tableData]);
 
-  // Register Export Button
-  useEffect(() => {
-    const visibleColumns =
-      tableData.length > 0
-        ? allRowKeys.filter(col => columnVisibilityModel?.[col] !== false)
-        : [];
-    addExtra('exportButton', {
-      node: (
-        <ExportSelector
-          selectedCalls={selectedCalls}
-          numTotalCalls={callsTotal}
-          disabled={callsTotal === 0}
-          visibleColumns={visibleColumns}
-          callQueryParams={{
-            entity,
-            project,
-            filter: effectiveFilter,
-            gridFilter: filterModel ?? DEFAULT_FILTER_CALLS,
-            gridSort: sortModel,
-          }}
-          rightmostButton={isReadonly}
-        />
-      ),
-      order: 2,
-    });
+  const visibleColumns = useMemo(() => {
+    return tableData.length > 0
+      ? allRowKeys.filter(col => columnVisibilityModel?.[col] !== false)
+      : [];
+  }, [allRowKeys, columnVisibilityModel, tableData]);
 
-    return () => removeExtra('exportButton');
-  }, [
-    selectedCalls,
-    callsTotal,
-    tableData,
-    allRowKeys,
-    columnVisibilityModel,
-    entity,
-    project,
-    isReadonly,
-    effectiveFilter,
-    filterModel,
-    sortModel,
-    addExtra,
-    removeExtra,
-  ]);
+  // Register Export Button
 
-  // Register Delete Button
   const [deleteConfirmModalOpen, setDeleteConfirmModalOpen] = useState(false);
-  useEffect(() => {
-    if (isReadonly) {
-      return;
-    }
-    addExtra('deleteSelectedCalls', {
-      node: (
-        <BulkDeleteButton
-          onClick={() => setDeleteConfirmModalOpen(true)}
-          disabled={selectedCalls.length === 0}
-        />
-      ),
-      order: 3,
-    });
-
-    return () => removeExtra('deleteSelectedCalls');
-  }, [addExtra, removeExtra, selectedCalls, isEvaluateTable, isReadonly]);
-
-  useEffect(() => {
-    if (isReadonly) {
-      return;
-    }
-    addExtra('deleteSelectedCallsModal', {
-      node: (
-        <ConfirmDeleteModal
-          calls={tableData
-            .filter(row => selectedCalls.includes(row.id))
-            .map(traceCallToUICallSchema)}
-          confirmDelete={deleteConfirmModalOpen}
-          setConfirmDelete={setDeleteConfirmModalOpen}
-          onDeleteCallback={() => {
-            setSelectedCalls([]);
-          }}
-        />
-      ),
-      order: -1,
-    });
-    return () => removeExtra('deleteSelectedCallsModal');
-  }, [
-    addExtra,
-    removeExtra,
-    selectedCalls,
-    deleteConfirmModalOpen,
-    isReadonly,
-    entity,
-    project,
-    tableData,
-  ]);
 
   // Called in reaction to Hide column menu
   const onColumnVisibilityModelChange = setColumnVisibilityModel
@@ -700,7 +580,7 @@ export const CallsTable: FC<{
       filterListItems={
         <Tailwind style={{display: 'contents'}}>
           {!hideOpSelector && (
-            <div style={{flex: '0 0 auto'}}>
+            <div className="flex-none">
               <ListItem sx={{minWidth: 190, width: 320}}>
                 <FormControl fullWidth>
                   <Autocomplete
@@ -794,15 +674,61 @@ export const CallsTable: FC<{
               }}
             />
           )}
-          {columnVisibilityModel && setColumnVisibilityModel && (
-            <div style={{flex: '0 0 auto'}}>
-              <ManageColumnsButton
-                columnInfo={columns}
-                columnVisibilityModel={columnVisibilityModel}
-                setColumnVisibilityModel={setColumnVisibilityModel}
+          {isEvaluateTable && (
+            <CompareEvaluationsTableButton
+              onClick={() => {
+                history.push(
+                  router.compareEvaluationsUri(entity, project, selectedCalls)
+                );
+              }}
+              disabled={selectedCalls.length === 0}
+            />
+          )}
+          {!isReadonly && (
+            <div className="flex-none">
+              <BulkDeleteButton
+                onClick={() => setDeleteConfirmModalOpen(true)}
+                disabled={selectedCalls.length === 0}
+              />
+              <ConfirmDeleteModal
+                calls={tableData
+                  .filter(row => selectedCalls.includes(row.id))
+                  .map(traceCallToUICallSchema)}
+                confirmDelete={deleteConfirmModalOpen}
+                setConfirmDelete={setDeleteConfirmModalOpen}
+                onDeleteCallback={() => {
+                  setSelectedCalls([]);
+                }}
               />
             </div>
           )}
+          <div className="flex-none">
+            <ExportSelector
+              selectedCalls={selectedCalls}
+              numTotalCalls={callsTotal}
+              disabled={callsTotal === 0}
+              visibleColumns={visibleColumns}
+              callQueryParams={{
+                entity,
+                project,
+                filter: effectiveFilter,
+                gridFilter: filterModel ?? DEFAULT_FILTER_CALLS,
+                gridSort: sortModel,
+              }}
+            />
+          </div>
+          {columnVisibilityModel && setColumnVisibilityModel && (
+            <>
+              <div className="h-24 flex-none border-l-[1px] border-moon-250"></div>
+              <div className="flex-none">
+                <ManageColumnsButton
+                  columnInfo={columns}
+                  columnVisibilityModel={columnVisibilityModel}
+                  setColumnVisibilityModel={setColumnVisibilityModel}
+                />
+              </div>
+            </>
+          )}
         </Tailwind>
       }>
       <StyledDataGrid
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTableButtons.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTableButtons.tsx
index a281e0542cc..6f6eee26841 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTableButtons.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CallsPage/CallsTableButtons.tsx
@@ -1,5 +1,7 @@
 import {Box, Popover} from '@mui/material';
 import {GridFilterModel, GridSortModel} from '@mui/x-data-grid-pro';
+import {useOrgName} from '@wandb/weave/common/hooks/useOrganization';
+import {useViewerUserInfo2} from '@wandb/weave/common/hooks/useViewerUserInfo';
 import {Radio} from '@wandb/weave/components';
 import {Button} from '@wandb/weave/components/Button';
 import {
@@ -12,6 +14,7 @@ import {Tailwind} from '@wandb/weave/components/Tailwind';
 import classNames from 'classnames';
 import React, {Dispatch, FC, SetStateAction, useRef, useState} from 'react';
 
+import * as userEvents from '../../../../../../integrations/analytics/userEvents';
 import {useWFHooks} from '../wfReactInterface/context';
 import {
   ContentType,
@@ -30,7 +33,6 @@ export const ExportSelector = ({
   visibleColumns,
   disabled,
   callQueryParams,
-  rightmostButton = false,
 }: {
   selectedCalls: string[];
   numTotalCalls: number;
@@ -43,12 +45,17 @@ export const ExportSelector = ({
     gridSort?: GridSortModel;
   };
   disabled: boolean;
-  rightmostButton?: boolean;
 }) => {
   const [selectionState, setSelectionState] = useState<SelectionState>('all');
   const [downloadLoading, setDownloadLoading] = useState<ContentType | null>(
     null
   );
+  const {loading: viewerLoading, userInfo} = useViewerUserInfo2();
+  const userInfoLoaded = !viewerLoading ? userInfo : null;
+  const {loading: orgNameLoading, orgName} = useOrgName({
+    entityName: userInfoLoaded?.username ?? '',
+    skip: viewerLoading,
+  });
 
   // Popover management
   const ref = useRef<HTMLDivElement>(null);
@@ -82,6 +89,7 @@ export const ExportSelector = ({
     const columns = [ContentType.csv, ContentType.tsv].includes(contentType)
       ? visibleColumns
       : undefined;
+    const startTime = Date.now();
     download(
       callQueryParams.entity,
       callQueryParams.project,
@@ -99,6 +107,19 @@ export const ExportSelector = ({
       initiateDownloadFromBlob(blob, fileName);
       setAnchorEl(null);
       setDownloadLoading(null);
+
+      userEvents.exportClicked({
+        dataSize: blob.size,
+        numColumns: columns?.length ?? null,
+        numRows: numTotalCalls,
+        numExpandedColumns: 0,
+        maxDepth: 0,
+        type: contentType,
+        latency: Date.now() - startTime,
+        userId: userInfoLoaded?.id ?? '',
+        organizationName: orgName,
+        username: userInfoLoaded?.username ?? '',
+      });
     });
     setSelectionState('all');
   };
@@ -107,7 +128,6 @@ export const ExportSelector = ({
     <>
       <span ref={ref}>
         <Button
-          className={rightmostButton ? 'mr-16' : 'mr-4'}
           icon="export-share-upload"
           variant="ghost"
           onClick={onClick}
@@ -155,10 +175,12 @@ export const ExportSelector = ({
                   setSelectionState={setSelectionState}
                 />
               )}
-              <DownloadGrid
-                onClickDownload={onClickDownload}
-                downloadLoading={downloadLoading}
-              />
+              {!viewerLoading && !orgNameLoading && (
+                <DownloadGrid
+                  onClickDownload={onClickDownload}
+                  downloadLoading={downloadLoading}
+                />
+              )}
             </DraggableHandle>
           </div>
         </Tailwind>
@@ -308,7 +330,6 @@ export const BulkDeleteButton: FC<{
         alignItems: 'center',
       }}>
       <Button
-        className="ml-4 mr-16"
         variant="ghost"
         size="medium"
         disabled={disabled}
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/compareEvaluationsContext.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/compareEvaluationsContext.tsx
index 4ff1cef996c..55e8ca8c8f6 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/compareEvaluationsContext.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/compareEvaluationsContext.tsx
@@ -1,7 +1,8 @@
-import {Box, LinearProgress} from '@material-ui/core';
+import {Box} from '@material-ui/core';
 import React, {useMemo} from 'react';
 
 import {WeaveLoader} from '../../../../../../common/components/WeaveLoader';
+import {LinearProgress} from '../../../../../LinearProgress';
 import {useEvaluationComparisonState} from './ecpState';
 import {EvaluationComparisonState} from './ecpState';
 import {ComparisonDimensionsType} from './ecpState';
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ObjectVersionPage.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ObjectVersionPage.tsx
index f4be43cd81c..f8c85adeae7 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ObjectVersionPage.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/ObjectVersionPage.tsx
@@ -4,6 +4,7 @@ import React, {useMemo} from 'react';
 
 import {maybePluralizeWord} from '../../../../../core/util/string';
 import {LoadingDots} from '../../../../LoadingDots';
+import {NotFoundPanel} from '../NotFoundPanel';
 import {CustomWeaveTypeProjectContext} from '../typeViews/CustomWeaveTypeDispatcher';
 import {WeaveCHTableSourceRefContext} from './CallPage/DataTableView';
 import {ObjectViewerSection} from './CallPage/ObjectViewerSection';
@@ -59,7 +60,7 @@ export const ObjectVersionPage: React.FC<{
   if (objectVersion.loading) {
     return <CenteredAnimatedLoader />;
   } else if (objectVersion.result == null) {
-    return <div>Object not found</div>;
+    return <NotFoundPanel title="Object not found" />;
   }
   return (
     <ObjectVersionPageInner {...props} objectVersion={objectVersion.result} />
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/OpVersionPage.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/OpVersionPage.tsx
index 030b8980675..8b76964845a 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/OpVersionPage.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/OpVersionPage.tsx
@@ -1,6 +1,7 @@
 import React, {useMemo} from 'react';
 
 import {LoadingDots} from '../../../../LoadingDots';
+import {NotFoundPanel} from '../NotFoundPanel';
 import {OpCodeViewer} from '../OpCodeViewer';
 import {
   CallsLink,
@@ -35,7 +36,7 @@ export const OpVersionPage: React.FC<{
   if (opVersion.loading) {
     return <CenteredAnimatedLoader />;
   } else if (opVersion.result == null) {
-    return <div>Op version not found</div>;
+    return <NotFoundPanel title="Op not found" />;
   }
   return <OpVersionPageInner opVersion={opVersion.result} />;
 };
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/common/SimplePageLayout.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/common/SimplePageLayout.tsx
index 6ed479f571c..233c21f23a7 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/common/SimplePageLayout.tsx
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/common/SimplePageLayout.tsx
@@ -1,15 +1,9 @@
-import MoreVertIcon from '@mui/icons-material/MoreVert';
-import {Box, ListItemText, MenuList, SxProps, Theme} from '@mui/material';
-// import {Menu} from '@mui/base/Menu';
-import IconButton from '@mui/material/IconButton';
-import Menu from '@mui/material/Menu';
-import MenuItem from '@mui/material/MenuItem';
+import {Box, SxProps, Theme} from '@mui/material';
 import * as Tabs from '@wandb/weave/components/Tabs';
 import _ from 'lodash';
 import React, {
   createContext,
   FC,
-  MouseEvent,
   ReactNode,
   useContext,
   useEffect,
@@ -35,10 +29,6 @@ export const SimplePageLayout: FC<{
     label: string;
     content: ReactNode;
   }>;
-  menuItems?: Array<{
-    label: string;
-    onClick: () => void;
-  }>;
   leftSidebar?: ReactNode;
   hideTabsIfSingle?: boolean;
   headerExtra?: ReactNode;
@@ -110,12 +100,6 @@ export const SimplePageLayout: FC<{
             }}>
             {props.title}
           </Box>
-          <Box
-            sx={{
-              flex: '0 0 auto',
-            }}>
-            {props.menuItems && <ActionMenu menuItems={props.menuItems} />}
-          </Box>
           {simplePageLayoutContextValue.headerSuffix}
         </Box>
         <Box
@@ -181,10 +165,6 @@ export const SimplePageLayoutWithHeader: FC<{
     label: string;
     content: ReactNode;
   }>;
-  menuItems?: Array<{
-    label: string;
-    onClick: () => void;
-  }>;
   headerExtra?: ReactNode;
   headerContent: ReactNode;
   leftSidebar?: ReactNode;
@@ -248,12 +228,6 @@ export const SimplePageLayoutWithHeader: FC<{
           }}>
           {props.title}
         </Box>
-        <Box
-          sx={{
-            flex: '0 0 auto',
-          }}>
-          {props.menuItems && <ActionMenu menuItems={props.menuItems} />}
-        </Box>
         {props.headerExtra}
         {simplePageLayoutContextValue.headerSuffix}
       </Box>
@@ -316,56 +290,6 @@ export const SimplePageLayoutWithHeader: FC<{
   );
 };
 
-const ActionMenu: FC<{
-  menuItems: Array<{
-    label: string;
-    onClick: () => void;
-  }>;
-}> = props => {
-  const [anchorEl, setAnchorEl] = useState<null | HTMLElement>(null);
-  const open = Boolean(anchorEl);
-  const handleClick = (event: MouseEvent<HTMLElement>) => {
-    setAnchorEl(event.currentTarget);
-  };
-  const handleClose = () => {
-    setAnchorEl(null);
-  };
-
-  return (
-    <Box
-      sx={{
-        height: '41px',
-        flex: '0 0 auto',
-      }}>
-      <IconButton
-        aria-label="more"
-        id="long-button"
-        aria-controls={open ? 'long-menu' : undefined}
-        aria-expanded={open ? 'true' : undefined}
-        aria-haspopup="true"
-        onClick={handleClick}>
-        <MoreVertIcon />
-      </IconButton>
-      <Menu anchorEl={anchorEl} open={open} onClose={handleClose}>
-        <Box sx={{width: 320, maxWidth: '100%'}}>
-          <MenuList>
-            {props.menuItems.map((item, i) => (
-              <MenuItem
-                key={i}
-                onClick={() => {
-                  handleClose();
-                  item.onClick();
-                }}>
-                <ListItemText>{item.label}</ListItemText>
-              </MenuItem>
-            ))}
-          </MenuList>
-        </Box>
-      </Menu>
-    </Box>
-  );
-};
-
 export const ScrollableTabContent: FC<{
   sx?: SxProps<Theme>;
 }> = props => {
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClientInterface/query.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClientInterface/query.ts
index 5399a9a6db8..2d40f8d3977 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClientInterface/query.ts
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/traceServerClientInterface/query.ts
@@ -40,6 +40,10 @@ type GtOperation = {
   $gt: [Operand, Operand];
 };
 
+type InOperation = {
+  $in: [Operand, Operand[]];
+};
+
 type GteOperation = {
   $gte: [Operand, Operand];
 };
@@ -61,6 +65,7 @@ type Operation =
   | EqOperation
   | GtOperation
   | GteOperation
+  | InOperation
   | ContainsOperation;
 
 type Operand =
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooks.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooks.ts
index 93f239b2d23..b7f49b615a3 100644
--- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooks.ts
+++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooks.ts
@@ -212,7 +212,7 @@ const useCall = (key: CallKey | null): Loadable<CallSchema | null> => {
         loading: true,
         result: null,
       };
-    } else {
+    } else if (result?.callId === key?.callId) {
       if (result) {
         callCache.set(key, result);
       }
@@ -220,6 +220,12 @@ const useCall = (key: CallKey | null): Loadable<CallSchema | null> => {
         loading: false,
         result,
       };
+    } else {
+      // Stale call result
+      return {
+        loading: false,
+        result: null,
+      };
     }
   }, [cachedCall, callRes, key]);
 };
@@ -669,6 +675,13 @@ const useOpVersion = (
       };
     }
 
+    if (opVersionRes.obj == null) {
+      return {
+        loading: false,
+        result: null,
+      };
+    }
+
     const returnedResult = convertTraceServerObjectVersionToOpSchema(
       opVersionRes.obj
     );
@@ -812,6 +825,13 @@ const useObjectVersion = (
       };
     }
 
+    if (objectVersionRes.obj == null) {
+      return {
+        loading: false,
+        result: null,
+      };
+    }
+
     const returnedResult: ObjectVersionSchema =
       convertTraceServerObjectVersionToSchema(objectVersionRes.obj);
 
@@ -1182,6 +1202,9 @@ const useCodeForOpRef = (opVersionRef: string): Loadable<string> => {
       return null;
     }
     const result = query.result[0];
+    if (result == null) {
+      return null;
+    }
     const ref = parseRef(opVersionRef);
     if (isWeaveObjectRef(ref)) {
       return {
@@ -1199,7 +1222,7 @@ const useCodeForOpRef = (opVersionRef: string): Loadable<string> => {
     {skip: fileSpec == null}
   );
   const text = useMemo(() => {
-    if (arrayBuffer.loading) {
+    if (arrayBuffer.loading || query.loading) {
       return {
         loading: true,
         result: null,
@@ -1207,11 +1230,11 @@ const useCodeForOpRef = (opVersionRef: string): Loadable<string> => {
     }
     return {
       loading: false,
-      result: new TextDecoder().decode(
-        arrayBuffer.result ?? new ArrayBuffer(0)
-      ),
+      result: arrayBuffer.result
+        ? new TextDecoder().decode(arrayBuffer.result)
+        : null,
     };
-  }, [arrayBuffer.loading, arrayBuffer.result]);
+  }, [arrayBuffer.loading, arrayBuffer.result, query.loading]);
 
   return text;
 };
diff --git a/weave-js/src/integrations/analytics/Analytics.ts b/weave-js/src/integrations/analytics/Analytics.ts
index f887763d248..a3998a7daa3 100644
--- a/weave-js/src/integrations/analytics/Analytics.ts
+++ b/weave-js/src/integrations/analytics/Analytics.ts
@@ -42,7 +42,7 @@ export const Analytics = {
     // only hit Sentry in prod
     const envIsProd =
       (window as any)?.CONFIG?.ENVIRONMENT_NAME === 'production';
-    const captureMessage = envIsProd ? Sentry.captureMessage : console.warn;
+    const captureMessage = envIsProd ? Sentry.captureMessage : () => {};
 
     // these represent conditions where expected behavior will fail
     // known conditions are that `window.analytics.track` exists but the service is
diff --git a/weave-js/src/integrations/analytics/userEvents.ts b/weave-js/src/integrations/analytics/userEvents.ts
index 8ed122a0fb7..97ab60fa5fb 100644
--- a/weave-js/src/integrations/analytics/userEvents.ts
+++ b/weave-js/src/integrations/analytics/userEvents.ts
@@ -40,3 +40,63 @@ export const deleteClicked = makeTrackEvent<
     };
   }
 >('Weave delete clicked');
+
+export const exportClicked = makeTrackEvent<
+  {
+    numRows: number;
+    numColumns: number | null;
+    type: string;
+    latency: number;
+    dataSize: number;
+    numExpandedColumns: number;
+    maxDepth: number;
+    userId: string;
+    organizationName: string;
+    username: string;
+  },
+  {
+    _description: `User clicked the export button`;
+    _location: '';
+    _motivation: 'Used for tracking export';
+    numRows: {
+      description: 'Number of rows exported';
+      exampleValues: [1000, 500];
+    };
+    numColumns: {
+      description: 'Number of columns exported';
+      exampleValues: [10, 5, null];
+    };
+    type: {
+      description: 'Type of export';
+      exampleValues: ['csv', 'json', 'jsonl'];
+    };
+    latency: {
+      description: 'Latency of export';
+      exampleValues: [1000, 500];
+    };
+    dataSize: {
+      description: 'Size of data exported';
+      exampleValues: [1000, 500];
+    };
+    numExpandedColumns: {
+      description: 'Number of columns passed for ref expansion';
+      exampleValues: [10, 5];
+    };
+    maxDepth: {
+      description: 'Max depth of ref expansion';
+      exampleValues: [1, 2, 5];
+    };
+    userId: {
+      description: 'ID of user exporting';
+      exampleValues: ['VXNlcjo0NTM4MTM='];
+    };
+    organizationName: {
+      description: 'Name of organization';
+      exampleValues: ['my-org'];
+    };
+    username: {
+      description: 'Username of user exporting';
+      exampleValues: ['my-username'];
+    };
+  }
+>('Weave export clicked');
diff --git a/weave-js/src/util/tokenCosts.ts b/weave-js/src/util/tokenCosts.ts
index 85dede7c583..80b58b895c7 100644
--- a/weave-js/src/util/tokenCosts.ts
+++ b/weave-js/src/util/tokenCosts.ts
@@ -1,4 +1,4 @@
-// costs are from https://github.com/AgentOps-AI/tokencost/blob/main/tokencost/model_prices.json on Aug 5, 2024
+// costs are from https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json on Aug 21, 2024
 // costs are in USD
 // costs are per token * 1000
 export const LLM_TOKEN_COSTS = {
@@ -7,7 +7,9 @@ export const LLM_TOKEN_COSTS = {
   'gpt-4o': {input: 0.005, output: 0.015},
   'gpt-4o-mini': {input: 0.00015, output: 0.0006},
   'gpt-4o-mini-2024-07-18': {input: 0.00015, output: 0.0006},
+  'chatgpt-4o-latest': {input: 0.005, output: 0.015},
   'gpt-4o-2024-05-13': {input: 0.005, output: 0.015},
+  'gpt-4o-2024-08-06': {input: 0.0025, output: 0.01},
   'gpt-4-turbo-preview': {input: 0.01, output: 0.03},
   'gpt-4-0314': {input: 0.03, output: 0.06},
   'gpt-4-0613': {input: 0.03, output: 0.06},
@@ -29,7 +31,7 @@ export const LLM_TOKEN_COSTS = {
   'gpt-3.5-turbo-16k-0613': {input: 0.003, output: 0.004},
   'ft:gpt-3.5-turbo': {input: 0.003, output: 0.006},
   'ft:gpt-4-0613': {input: 0.03, output: 0.06},
-  'ft:gpt-4o-2024-05-13': {input: 0.005, output: 0.015},
+  'ft:gpt-4o-mini-2024-07-18': {input: 0.0003, output: 0.0012},
   'ft:davinci-002': {input: 0.002, output: 0.002},
   'ft:babbage-002': {input: 0.0004, output: 0.0004},
   'text-embedding-3-large': {input: 0.00013, output: 0.0},
@@ -40,6 +42,8 @@ export const LLM_TOKEN_COSTS = {
   'text-moderation-007': {input: 0.0, output: 0.0},
   'text-moderation-latest': {input: 0.0, output: 0.0},
   'azure/gpt-4o': {input: 0.005, output: 0.015},
+  'azure/global-standard/gpt-4o-mini': {input: 0.00015, output: 0.0006},
+  'azure/gpt-4o-mini': {input: 0.000165, output: 0.00066},
   'azure/gpt-4-turbo-2024-04-09': {input: 0.01, output: 0.03},
   'azure/gpt-4-0125-preview': {input: 0.01, output: 0.03},
   'azure/gpt-4-1106-preview': {input: 0.01, output: 0.03},
@@ -172,6 +176,12 @@ export const LLM_TOKEN_COSTS = {
   'vertex_ai/claude-3-haiku@20240307': {input: 0.00025, output: 0.00125},
   'vertex_ai/claude-3-opus@20240229': {input: 0.015, output: 0.075},
   'vertex_ai/meta/llama3-405b-instruct-maas': {input: 0.0, output: 0.0},
+  'vertex_ai/mistral-large@latest': {input: 0.003, output: 0.009},
+  'vertex_ai/mistral-large@2407': {input: 0.003, output: 0.009},
+  'vertex_ai/mistral-nemo@latest': {input: 0.003, output: 0.003},
+  'vertex_ai/mistral-nemo@2407': {input: 0.003, output: 0.003},
+  'vertex_ai/codestral@latest': {input: 0.001, output: 0.003},
+  'vertex_ai/codestral@2405': {input: 0.001, output: 0.003},
   'text-embedding-004': {input: 6.25e-6, output: 0.0},
   'text-multilingual-embedding-002': {input: 6.25e-6, output: 0.0},
   'textembedding-gecko': {input: 6.25e-6, output: 0.0},
@@ -186,14 +196,12 @@ export const LLM_TOKEN_COSTS = {
   'palm/text-bison': {input: 0.000125, output: 0.000125},
   'palm/text-bison-001': {input: 0.000125, output: 0.000125},
   'palm/text-bison-safety-off': {input: 0.000125, output: 0.000125},
-  'palm/text-bison-safety-recitation-off': {
-    input: 0.000125,
-    output: 0.000125,
-  },
+  'palm/text-bison-safety-recitation-off': {input: 0.000125, output: 0.000125},
   'gemini/gemini-1.5-flash': {input: 0.00035, output: 0.00105},
   'gemini/gemini-1.5-flash-latest': {input: 0.00035, output: 0.00105},
   'gemini/gemini-pro': {input: 0.00035, output: 0.00105},
   'gemini/gemini-1.5-pro': {input: 0.0035, output: 0.0105},
+  'gemini/gemini-1.5-pro-exp-0801': {input: 0.0035, output: 0.0105},
   'gemini/gemini-1.5-pro-latest': {input: 0.0035, output: 0.00105},
   'gemini/gemini-pro-vision': {input: 0.00035, output: 0.00105},
   'gemini/gemini-gemma-2-27b-it': {input: 0.00035, output: 0.00105},
@@ -205,6 +213,12 @@ export const LLM_TOKEN_COSTS = {
   command: {input: 0.015, output: 0.015},
   'command-medium-beta': {input: 0.015, output: 0.015},
   'command-xlarge-beta': {input: 0.015, output: 0.015},
+  'embed-english-v3.0': {input: 0.0001, output: 0.0},
+  'embed-english-light-v3.0': {input: 0.0001, output: 0.0},
+  'embed-multilingual-v3.0': {input: 0.0001, output: 0.0},
+  'embed-english-v2.0': {input: 0.0001, output: 0.0},
+  'embed-english-light-v2.0': {input: 0.0001, output: 0.0},
+  'embed-multilingual-v2.0': {input: 0.0001, output: 0.0},
   'replicate/meta/llama-2-13b': {input: 0.0001, output: 0.0005},
   'replicate/meta/llama-2-13b-chat': {input: 0.0001, output: 0.0005},
   'replicate/meta/llama-2-70b': {input: 0.00065, output: 0.00275},
@@ -225,10 +239,7 @@ export const LLM_TOKEN_COSTS = {
     output: 0.001,
   },
   'openrouter/deepseek/deepseek-coder': {input: 0.00014, output: 0.00028},
-  'openrouter/microsoft/wizardlm-2-8x22b:nitro': {
-    input: 0.001,
-    output: 0.001,
-  },
+  'openrouter/microsoft/wizardlm-2-8x22b:nitro': {input: 0.001, output: 0.001},
   'openrouter/google/gemini-pro-1.5': {input: 0.0025, output: 0.0075},
   'openrouter/mistralai/mixtral-8x22b-instruct': {
     input: 0.00065,
@@ -242,21 +253,16 @@ export const LLM_TOKEN_COSTS = {
     output: 0.00125,
   },
   'openrouter/anthropic/claude-3.5-sonnet': {input: 0.003, output: 0.015},
+  'openrouter/anthropic/claude-3.5-sonnet:beta': {input: 0.003, output: 0.015},
   'openrouter/anthropic/claude-3-sonnet': {input: 0.003, output: 0.015},
   'openrouter/mistralai/mistral-large': {input: 0.008, output: 0.024},
   'openrouter/cognitivecomputations/dolphin-mixtral-8x7b': {
     input: 0.0005,
     output: 0.0005,
   },
-  'openrouter/google/gemini-pro-vision': {
-    input: 0.000125,
-    output: 0.000375,
-  },
+  'openrouter/google/gemini-pro-vision': {input: 0.000125, output: 0.000375},
   'openrouter/fireworks/firellava-13b': {input: 0.0002, output: 0.0002},
-  'openrouter/meta-llama/llama-3-8b-instruct:free': {
-    input: 0.0,
-    output: 0.0,
-  },
+  'openrouter/meta-llama/llama-3-8b-instruct:free': {input: 0.0, output: 0.0},
   'openrouter/meta-llama/llama-3-8b-instruct:extended': {
     input: 0.000225,
     output: 0.00225,
@@ -275,17 +281,11 @@ export const LLM_TOKEN_COSTS = {
   'openrouter/openai/gpt-3.5-turbo': {input: 0.0015, output: 0.002},
   'openrouter/openai/gpt-3.5-turbo-16k': {input: 0.003, output: 0.004},
   'openrouter/openai/gpt-4': {input: 0.03, output: 0.06},
-  'openrouter/anthropic/claude-instant-v1': {
-    input: 0.00163,
-    output: 0.00551,
-  },
+  'openrouter/anthropic/claude-instant-v1': {input: 0.00163, output: 0.00551},
   'openrouter/anthropic/claude-2': {input: 0.01102, output: 0.03268},
   'openrouter/anthropic/claude-3-opus': {input: 0.015, output: 0.075},
   'openrouter/google/palm-2-chat-bison': {input: 0.0005, output: 0.0005},
-  'openrouter/google/palm-2-codechat-bison': {
-    input: 0.0005,
-    output: 0.0005,
-  },
+  'openrouter/google/palm-2-codechat-bison': {input: 0.0005, output: 0.0005},
   'openrouter/meta-llama/llama-2-13b-chat': {input: 0.0002, output: 0.0002},
   'openrouter/meta-llama/llama-2-70b-chat': {input: 0.0015, output: 0.0015},
   'openrouter/meta-llama/codellama-34b-instruct': {
@@ -302,22 +302,10 @@ export const LLM_TOKEN_COSTS = {
     input: 0.013875,
     output: 0.013875,
   },
-  'openrouter/undi95/remm-slerp-l2-13b': {
-    input: 0.001875,
-    output: 0.001875,
-  },
-  'openrouter/pygmalionai/mythalion-13b': {
-    input: 0.001875,
-    output: 0.001875,
-  },
-  'openrouter/mistralai/mistral-7b-instruct': {
-    input: 0.00013,
-    output: 0.00013,
-  },
-  'openrouter/mistralai/mistral-7b-instruct:free': {
-    input: 0.0,
-    output: 0.0,
-  },
+  'openrouter/undi95/remm-slerp-l2-13b': {input: 0.001875, output: 0.001875},
+  'openrouter/pygmalionai/mythalion-13b': {input: 0.001875, output: 0.001875},
+  'openrouter/mistralai/mistral-7b-instruct': {input: 0.00013, output: 0.00013},
+  'openrouter/mistralai/mistral-7b-instruct:free': {input: 0.0, output: 0.0},
   'j2-ultra': {input: 0.015, output: 0.015},
   'j2-mid': {input: 0.01, output: 0.01},
   'j2-light': {input: 0.003, output: 0.003},
@@ -339,6 +327,7 @@ export const LLM_TOKEN_COSTS = {
   'mistral.mistral-7b-instruct-v0:2': {input: 0.00015, output: 0.0002},
   'mistral.mixtral-8x7b-instruct-v0:1': {input: 0.00045, output: 0.0007},
   'mistral.mistral-large-2402-v1:0': {input: 0.008, output: 0.024},
+  'mistral.mistral-large-2407-v1:0': {input: 0.003, output: 0.009},
   'bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1': {
     input: 0.00045,
     output: 0.0007,
@@ -376,42 +365,24 @@ export const LLM_TOKEN_COSTS = {
     output: 0.0312,
   },
   'anthropic.claude-3-sonnet-20240229-v1:0': {input: 0.003, output: 0.015},
-  'anthropic.claude-3-5-sonnet-20240620-v1:0': {
-    input: 0.003,
-    output: 0.015,
-  },
-  'anthropic.claude-3-haiku-20240307-v1:0': {
-    input: 0.00025,
-    output: 0.00125,
-  },
+  'anthropic.claude-3-5-sonnet-20240620-v1:0': {input: 0.003, output: 0.015},
+  'anthropic.claude-3-haiku-20240307-v1:0': {input: 0.00025, output: 0.00125},
   'anthropic.claude-3-opus-20240229-v1:0': {input: 0.015, output: 0.075},
   'anthropic.claude-v1': {input: 0.008, output: 0.024},
   'bedrock/us-east-1/anthropic.claude-v1': {input: 0.008, output: 0.024},
   'bedrock/us-west-2/anthropic.claude-v1': {input: 0.008, output: 0.024},
-  'bedrock/ap-northeast-1/anthropic.claude-v1': {
-    input: 0.008,
-    output: 0.024,
-  },
+  'bedrock/ap-northeast-1/anthropic.claude-v1': {input: 0.008, output: 0.024},
   'bedrock/eu-central-1/anthropic.claude-v1': {input: 0.008, output: 0.024},
   'anthropic.claude-v2': {input: 0.008, output: 0.024},
   'bedrock/us-east-1/anthropic.claude-v2': {input: 0.008, output: 0.024},
   'bedrock/us-west-2/anthropic.claude-v2': {input: 0.008, output: 0.024},
-  'bedrock/ap-northeast-1/anthropic.claude-v2': {
-    input: 0.008,
-    output: 0.024,
-  },
+  'bedrock/ap-northeast-1/anthropic.claude-v2': {input: 0.008, output: 0.024},
   'bedrock/eu-central-1/anthropic.claude-v2': {input: 0.008, output: 0.024},
   'anthropic.claude-v2:1': {input: 0.008, output: 0.024},
   'bedrock/us-east-1/anthropic.claude-v2:1': {input: 0.008, output: 0.024},
   'bedrock/us-west-2/anthropic.claude-v2:1': {input: 0.008, output: 0.024},
-  'bedrock/ap-northeast-1/anthropic.claude-v2:1': {
-    input: 0.008,
-    output: 0.024,
-  },
-  'bedrock/eu-central-1/anthropic.claude-v2:1': {
-    input: 0.008,
-    output: 0.024,
-  },
+  'bedrock/ap-northeast-1/anthropic.claude-v2:1': {input: 0.008, output: 0.024},
+  'bedrock/eu-central-1/anthropic.claude-v2:1': {input: 0.008, output: 0.024},
   'anthropic.claude-instant-v1': {input: 0.00163, output: 0.00551},
   'bedrock/us-east-1/anthropic.claude-instant-v1': {
     input: 0.0008,
@@ -441,15 +412,13 @@ export const LLM_TOKEN_COSTS = {
   'meta.llama3-70b-instruct-v1:0': {input: 0.00265, output: 0.0035},
   'meta.llama3-1-8b-instruct-v1:0': {input: 0.0004, output: 0.0006},
   'meta.llama3-1-70b-instruct-v1:0': {input: 0.00265, output: 0.0035},
+  'meta.llama3-1-405b-instruct-v1:0': {input: 0.00532, output: 0.016},
   'sagemaker/meta-textgeneration-llama-2-7b': {input: 0.0, output: 0.0},
   'sagemaker/meta-textgeneration-llama-2-7b-f': {input: 0.0, output: 0.0},
   'sagemaker/meta-textgeneration-llama-2-13b': {input: 0.0, output: 0.0},
   'sagemaker/meta-textgeneration-llama-2-13b-f': {input: 0.0, output: 0.0},
   'sagemaker/meta-textgeneration-llama-2-70b': {input: 0.0, output: 0.0},
-  'sagemaker/meta-textgeneration-llama-2-70b-b-f': {
-    input: 0.0,
-    output: 0.0,
-  },
+  'sagemaker/meta-textgeneration-llama-2-70b-b-f': {input: 0.0, output: 0.0},
   'together-ai-up-to-4b': {input: 0.0001, output: 0.0001},
   'together-ai-4.1b-8b': {input: 0.0002, output: 0.0002},
   'together-ai-8.1b-21b': {input: 0.0003, output: 0.0003},
@@ -461,12 +430,22 @@ export const LLM_TOKEN_COSTS = {
     output: 0.0006,
   },
   'ollama/codegemma': {input: 0.0, output: 0.0},
+  'ollama/codegeex4': {input: 0.0, output: 0.0},
+  'ollama/deepseek-coder-v2-instruct': {input: 0.0, output: 0.0},
+  'ollama/deepseek-coder-v2-base': {input: 0.0, output: 0.0},
+  'ollama/deepseek-coder-v2-lite-instruct': {input: 0.0, output: 0.0},
+  'ollama/deepseek-coder-v2-lite-base': {input: 0.0, output: 0.0},
+  'ollama/internlm2_5-20b-chat': {input: 0.0, output: 0.0},
   'ollama/llama2': {input: 0.0, output: 0.0},
+  'ollama/llama2:7b': {input: 0.0, output: 0.0},
   'ollama/llama2:13b': {input: 0.0, output: 0.0},
   'ollama/llama2:70b': {input: 0.0, output: 0.0},
   'ollama/llama2-uncensored': {input: 0.0, output: 0.0},
   'ollama/llama3': {input: 0.0, output: 0.0},
+  'ollama/llama3:8b': {input: 0.0, output: 0.0},
   'ollama/llama3:70b': {input: 0.0, output: 0.0},
+  'ollama/llama3.1': {input: 0.0, output: 0.0},
+  'ollama/mistral-large-instruct-2407': {input: 0.0, output: 0.0},
   'ollama/mistral': {input: 0.0, output: 0.0},
   'ollama/mistral-7B-Instruct-v0.1': {input: 0.0, output: 0.0},
   'ollama/mistral-7B-Instruct-v0.2': {input: 0.0, output: 0.0},
@@ -475,19 +454,13 @@ export const LLM_TOKEN_COSTS = {
   'ollama/codellama': {input: 0.0, output: 0.0},
   'ollama/orca-mini': {input: 0.0, output: 0.0},
   'ollama/vicuna': {input: 0.0, output: 0.0},
-  'deepinfra/lizpreciatior/lzlv_70b_fp16_hf': {
-    input: 0.0007,
-    output: 0.0009,
-  },
+  'deepinfra/lizpreciatior/lzlv_70b_fp16_hf': {input: 0.0007, output: 0.0009},
   'deepinfra/Gryphe/MythoMax-L2-13b': {input: 0.00022, output: 0.00022},
   'deepinfra/mistralai/Mistral-7B-Instruct-v0.1': {
     input: 0.00013,
     output: 0.00013,
   },
-  'deepinfra/meta-llama/Llama-2-70b-chat-hf': {
-    input: 0.0007,
-    output: 0.0009,
-  },
+  'deepinfra/meta-llama/Llama-2-70b-chat-hf': {input: 0.0007, output: 0.0009},
   'deepinfra/cognitivecomputations/dolphin-2.6-mixtral-8x7b': {
     input: 0.00027,
     output: 0.00027,
@@ -509,19 +482,10 @@ export const LLM_TOKEN_COSTS = {
     input: 0.0007,
     output: 0.0009,
   },
-  'deepinfra/meta-llama/Llama-2-13b-chat-hf': {
-    input: 0.00022,
-    output: 0.00022,
-  },
+  'deepinfra/meta-llama/Llama-2-13b-chat-hf': {input: 0.00022, output: 0.00022},
   'deepinfra/amazon/MistralLite': {input: 0.0002, output: 0.0002},
-  'deepinfra/meta-llama/Llama-2-7b-chat-hf': {
-    input: 0.00013,
-    output: 0.00013,
-  },
-  'deepinfra/meta-llama/Meta-Llama-3-8B-Instruct': {
-    input: 8e-5,
-    output: 8e-5,
-  },
+  'deepinfra/meta-llama/Llama-2-7b-chat-hf': {input: 0.00013, output: 0.00013},
+  'deepinfra/meta-llama/Meta-Llama-3-8B-Instruct': {input: 8e-5, output: 8e-5},
   'deepinfra/meta-llama/Meta-Llama-3-70B-Instruct': {
     input: 0.00059,
     output: 0.00079,
@@ -530,6 +494,16 @@ export const LLM_TOKEN_COSTS = {
   'deepinfra/openchat/openchat_3.5': {input: 0.00013, output: 0.00013},
   'perplexity/codellama-34b-instruct': {input: 0.00035, output: 0.0014},
   'perplexity/codellama-70b-instruct': {input: 0.0007, output: 0.0028},
+  'perplexity/llama-3.1-70b-instruct': {input: 0.001, output: 0.001},
+  'perplexity/llama-3.1-8b-instruct': {input: 0.0002, output: 0.0002},
+  'perplexity/llama-3.1-sonar-huge-128k-online': {input: 0.005, output: 0.005},
+  'perplexity/llama-3.1-sonar-large-128k-online': {input: 0.001, output: 0.001},
+  'perplexity/llama-3.1-sonar-large-128k-chat': {input: 0.001, output: 0.001},
+  'perplexity/llama-3.1-sonar-small-128k-chat': {input: 0.0002, output: 0.0002},
+  'perplexity/llama-3.1-sonar-small-128k-online': {
+    input: 0.0002,
+    output: 0.0002,
+  },
   'perplexity/pplx-7b-chat': {input: 7e-5, output: 0.00028},
   'perplexity/pplx-70b-chat': {input: 0.0007, output: 0.0028},
   'perplexity/pplx-7b-online': {input: 0.0, output: 0.00028},
@@ -541,11 +515,23 @@ export const LLM_TOKEN_COSTS = {
   'perplexity/sonar-small-online': {input: 0.0, output: 0.00028},
   'perplexity/sonar-medium-chat': {input: 0.0006, output: 0.0018},
   'perplexity/sonar-medium-online': {input: 0.0, output: 0.0018},
-  'fireworks_ai/firefunction-v2': {input: 0.0009, output: 0.0009},
-  'fireworks_ai/mixtral-8x22b-instruct-hf': {input: 0.0012, output: 0.0012},
-  'fireworks_ai/qwen2-72b-instruct': {input: 0.0009, output: 0.0009},
-  'fireworks_ai/yi-large': {input: 0.003, output: 0.003},
-  'fireworks_ai/deepseek-coder-v2-instruct': {
+  'fireworks_ai/accounts/fireworks/models/firefunction-v2': {
+    input: 0.0009,
+    output: 0.0009,
+  },
+  'fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf': {
+    input: 0.0012,
+    output: 0.0012,
+  },
+  'fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct': {
+    input: 0.0009,
+    output: 0.0009,
+  },
+  'fireworks_ai/accounts/fireworks/models/yi-large': {
+    input: 0.003,
+    output: 0.003,
+  },
+  'fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct': {
     input: 0.0012,
     output: 0.0012,
   },
@@ -561,28 +547,13 @@ export const LLM_TOKEN_COSTS = {
     input: 0.0009,
     output: 0.0009,
   },
-  'anyscale/HuggingFaceH4/zephyr-7b-beta': {
-    input: 0.00015,
-    output: 0.00015,
-  },
+  'anyscale/HuggingFaceH4/zephyr-7b-beta': {input: 0.00015, output: 0.00015},
   'anyscale/google/gemma-7b-it': {input: 0.00015, output: 0.00015},
-  'anyscale/meta-llama/Llama-2-7b-chat-hf': {
-    input: 0.00015,
-    output: 0.00015,
-  },
-  'anyscale/meta-llama/Llama-2-13b-chat-hf': {
-    input: 0.00025,
-    output: 0.00025,
-  },
+  'anyscale/meta-llama/Llama-2-7b-chat-hf': {input: 0.00015, output: 0.00015},
+  'anyscale/meta-llama/Llama-2-13b-chat-hf': {input: 0.00025, output: 0.00025},
   'anyscale/meta-llama/Llama-2-70b-chat-hf': {input: 0.001, output: 0.001},
-  'anyscale/codellama/CodeLlama-34b-Instruct-hf': {
-    input: 0.001,
-    output: 0.001,
-  },
-  'anyscale/codellama/CodeLlama-70b-Instruct-hf': {
-    input: 0.001,
-    output: 0.001,
-  },
+  'anyscale/codellama/CodeLlama-34b-Instruct-hf': {input: 0.001, output: 0.001},
+  'anyscale/codellama/CodeLlama-70b-Instruct-hf': {input: 0.001, output: 0.001},
   'anyscale/meta-llama/Meta-Llama-3-8B-Instruct': {
     input: 0.00015,
     output: 0.00015,
@@ -614,16 +585,21 @@ export const LLM_TOKEN_COSTS = {
   'voyage/voyage-code-2': {input: 0.00012, output: 0.0},
   'voyage/voyage-2': {input: 0.0001, output: 0.0},
   'voyage/voyage-lite-02-instruct': {input: 0.0001, output: 0.0},
+  'databricks/databricks-meta-llama-3-1-405b-instruct': {
+    input: 0.005,
+    output: 0.015,
+  },
+  'databricks/databricks-meta-llama-3-1-70b-instruct': {
+    input: 0.001,
+    output: 0.003,
+  },
   'databricks/databricks-dbrx-instruct': {input: 0.00075, output: 0.00225},
   'databricks/databricks-meta-llama-3-70b-instruct': {
     input: 0.001,
     output: 0.003,
   },
   'databricks/databricks-llama-2-70b-chat': {input: 0.0005, output: 0.0015},
-  'databricks/databricks-mixtral-8x7b-instruct': {
-    input: 0.0005,
-    output: 0.001,
-  },
+  'databricks/databricks-mixtral-8x7b-instruct': {input: 0.0005, output: 0.001},
   'databricks/databricks-mpt-30b-instruct': {input: 0.001, output: 0.001},
   'databricks/databricks-mpt-7b-instruct': {input: 0.0005, output: 0.0005},
   'databricks/databricks-bge-large-en': {input: 0.0001, output: 0.0},
diff --git a/weave/conftest.py b/weave/conftest.py
index 3122c7488fb..b7b94fd2e70 100644
--- a/weave/conftest.py
+++ b/weave/conftest.py
@@ -28,7 +28,7 @@
 
 from . import autopatch, environment, logs
 from .tests import fixture_fakewandb
-from .tests.trace_server_clickhouse_conftest import *
+from .tests.trace.trace_server_clickhouse_conftest import *
 from .tests.wandb_system_tests_conftest import *
 
 logs.configure_logger()
diff --git a/weave/flow/model.py b/weave/flow/model.py
index dc211902ba9..0cd23eabd54 100644
--- a/weave/flow/model.py
+++ b/weave/flow/model.py
@@ -2,6 +2,11 @@
 
 from weave.flow.obj import Object
 
+INFER_METHOD_NAMES = {"predict", "infer", "forward", "invoke"}
+
+
+class MissingInferenceMethodError(Exception): ...
+
 
 class Model(Object):
     """
@@ -32,20 +37,18 @@ def predict(self, input_data: str) -> dict:
     # TODO: should be infer: Callable
 
     def get_infer_method(self) -> Callable:
-        for infer_method_names in ("predict", "infer", "forward"):
-            infer_method = getattr(self, infer_method_names, None)
-            if infer_method:
+        for name in INFER_METHOD_NAMES:
+            if infer_method := getattr(self, name, None):
                 return infer_method
-        raise ValueError(
-            f"Model {self} does not have a predict, infer, or forward method."
+        raise MissingInferenceMethodError(
+            f"Missing a method with name in ({INFER_METHOD_NAMES})"
         )
 
 
 def get_infer_method(model: Model) -> Callable:
-    for infer_method_names in ("predict", "infer", "forward"):
-        infer_method = getattr(model, infer_method_names, None)
-        if infer_method:
+    for name in INFER_METHOD_NAMES:
+        if (infer_method := getattr(model, name, None)) is not None:
             return infer_method
-    raise ValueError(
-        f"Model {model} does not have a predict, infer, or forward method."
+    raise MissingInferenceMethodError(
+        f"Missing a method with name in ({INFER_METHOD_NAMES})"
     )
diff --git a/weave/frontend/index.html b/weave/frontend/index.html
index ece4bec1eff..fe7b2b7dc2f 100644
--- a/weave/frontend/index.html
+++ b/weave/frontend/index.html
@@ -91,8 +91,8 @@
     <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700&display=swap" />
     <link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons" />
 
-    <script type="module" crossorigin src="/__frontend/assets/index-CLn_89l-.js"></script>
-    <link rel="stylesheet" crossorigin href="/__frontend/assets/index-C8B3oao-.css">
+    <script type="module" crossorigin src="/__frontend/assets/index-D5wDQQ7E.js"></script>
+    <link rel="stylesheet" crossorigin href="/__frontend/assets/index-CaHTWt3u.css">
   </head>
 
   <body id="weave-body">
diff --git a/weave/frontend/sha1.txt b/weave/frontend/sha1.txt
index e6f622e4e43..332da55d66b 100644
--- a/weave/frontend/sha1.txt
+++ b/weave/frontend/sha1.txt
@@ -1 +1 @@
-162b7d481adba453dfb6af402634f72b5aa5139a
+6e171efb9a63e7833f1d0d49d04b07141fb34828
diff --git a/weave/tests/legacy/__init__.py b/weave/tests/legacy/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/weave/tests/test_access.py b/weave/tests/legacy/test_access.py
similarity index 96%
rename from weave/tests/test_access.py
rename to weave/tests/legacy/test_access.py
index 1d504bf6c81..d83a58491b3 100644
--- a/weave/tests/test_access.py
+++ b/weave/tests/legacy/test_access.py
@@ -3,7 +3,7 @@
 import weave
 from weave.legacy import artifact_fs, artifact_local, wandb_api
 
-from .. import environment, errors, storage
+from ... import environment, errors, storage
 
 
 @pytest.fixture()
diff --git a/weave/tests/test_api.py b/weave/tests/legacy/test_api.py
similarity index 96%
rename from weave/tests/test_api.py
rename to weave/tests/legacy/test_api.py
index ec544a219fb..c49db782e39 100644
--- a/weave/tests/test_api.py
+++ b/weave/tests/legacy/test_api.py
@@ -1,7 +1,7 @@
 import shutil
 
-from .. import api as weave
-from ..show import _show_params
+from ... import api as weave
+from ...show import _show_params
 
 
 def test_print_save_val():
diff --git a/weave/tests/test_arrow.py b/weave/tests/legacy/test_arrow.py
similarity index 99%
rename from weave/tests/test_arrow.py
rename to weave/tests/legacy/test_arrow.py
index b2ab5ab22b7..6dc81439f05 100644
--- a/weave/tests/test_arrow.py
+++ b/weave/tests/legacy/test_arrow.py
@@ -26,11 +26,12 @@
 from weave.legacy.ops_primitives import list_, make_list
 from weave.tests import list_arrow_test_helpers as lath
 
-from .. import api as weave
-from .. import errors, storage, weave_internal
-from .. import weave_types as types
-from ..tests import tag_test_util as ttu
-from ..tests import test_wb, weavejs_ops
+from ... import api as weave
+from ... import errors, storage, weave_internal
+from ... import weave_types as types
+from ...tests import tag_test_util as ttu
+from ...tests import weavejs_ops
+from ...tests.legacy import test_wb
 
 _loading_builtins_token = context_state.set_loading_built_ins()
 # T in `conftest::pre_post_each_test` we set a custom artifact directory for each test for isolation
diff --git a/weave/tests/test_arrow_awl.py b/weave/tests/legacy/test_arrow_awl.py
similarity index 98%
rename from weave/tests/test_arrow_awl.py
rename to weave/tests/legacy/test_arrow_awl.py
index e036a77a103..974de06afc7 100644
--- a/weave/tests/test_arrow_awl.py
+++ b/weave/tests/legacy/test_arrow_awl.py
@@ -8,7 +8,7 @@
 from weave.legacy.arrow.convert import to_arrow
 from weave.legacy.language_features.tagging import tag_store, tagged_value_type
 
-from .concrete_tagged_value import (
+from ..concrete_tagged_value import (
     TaggedValue,
     concrete_from_tagstore,
     concrete_to_tagstore,
diff --git a/weave/tests/test_arrow_concat.py b/weave/tests/legacy/test_arrow_concat.py
similarity index 100%
rename from weave/tests/test_arrow_concat.py
rename to weave/tests/legacy/test_arrow_concat.py
diff --git a/weave/tests/test_arrow_perf.py b/weave/tests/legacy/test_arrow_perf.py
similarity index 100%
rename from weave/tests/test_arrow_perf.py
rename to weave/tests/legacy/test_arrow_perf.py
diff --git a/weave/tests/test_arrow_topy.py b/weave/tests/legacy/test_arrow_topy.py
similarity index 99%
rename from weave/tests/test_arrow_topy.py
rename to weave/tests/legacy/test_arrow_topy.py
index 047aec11271..6f5b2839cf8 100644
--- a/weave/tests/test_arrow_topy.py
+++ b/weave/tests/legacy/test_arrow_topy.py
@@ -6,7 +6,7 @@
 import weave
 from weave.legacy import ops_arrow
 
-from .concrete_tagged_value import (
+from ..concrete_tagged_value import (
     TaggedValue,
     concrete_from_tagstore,
     concrete_to_tagstore,
diff --git a/weave/tests/test_arrow_vectorizer.py b/weave/tests/legacy/test_arrow_vectorizer.py
similarity index 99%
rename from weave/tests/test_arrow_vectorizer.py
rename to weave/tests/legacy/test_arrow_vectorizer.py
index 1097cbdfc82..51aeb512543 100644
--- a/weave/tests/test_arrow_vectorizer.py
+++ b/weave/tests/legacy/test_arrow_vectorizer.py
@@ -17,9 +17,9 @@
 from weave.legacy.ops_domain import wb_domain_types as wdt
 from weave.legacy.ops_primitives import Boolean, Number, date, dict_, list_
 
-from .. import api as weave
-from .. import errors, weave_internal
-from .. import weave_types as types
+from ... import api as weave
+from ... import errors, weave_internal
+from ... import weave_types as types
 
 string_ops_test_cases = [
     ("eq-scalar", lambda x: x == "bc", [True, False, False]),
diff --git a/weave/tests/test_artifact.py b/weave/tests/legacy/test_artifact.py
similarity index 99%
rename from weave/tests/test_artifact.py
rename to weave/tests/legacy/test_artifact.py
index 23f5126384a..27cde9e1e1b 100644
--- a/weave/tests/test_artifact.py
+++ b/weave/tests/legacy/test_artifact.py
@@ -4,7 +4,7 @@
 from weave.legacy import artifact_fs, artifact_local
 from weave.legacy import ops_arrow as arrow
 
-from .. import storage
+from ... import storage
 
 
 def test_artifact():
diff --git a/weave/tests/test_artifact_metadata.py b/weave/tests/legacy/test_artifact_metadata.py
similarity index 100%
rename from weave/tests/test_artifact_metadata.py
rename to weave/tests/legacy/test_artifact_metadata.py
diff --git a/weave/tests/test_assign_perf.py b/weave/tests/legacy/test_assign_perf.py
similarity index 100%
rename from weave/tests/test_assign_perf.py
rename to weave/tests/legacy/test_assign_perf.py
diff --git a/weave/tests/test_assignment.py b/weave/tests/legacy/test_assignment.py
similarity index 100%
rename from weave/tests/test_assignment.py
rename to weave/tests/legacy/test_assignment.py
diff --git a/weave/tests/test_async.py b/weave/tests/legacy/test_async.py
similarity index 98%
rename from weave/tests/test_async.py
rename to weave/tests/legacy/test_async.py
index 7c0d7b04f14..976e7e9398d 100644
--- a/weave/tests/test_async.py
+++ b/weave/tests/legacy/test_async.py
@@ -2,7 +2,7 @@
 
 from weave.legacy import async_demo, ops, runs
 
-from .. import api, storage
+from ... import api, storage
 
 
 def test_run_basic():
diff --git a/weave/tests/test_async_queue.py b/weave/tests/legacy/test_async_queue.py
similarity index 100%
rename from weave/tests/test_async_queue.py
rename to weave/tests/legacy/test_async_queue.py
diff --git a/weave/tests/test_basic_ops.py b/weave/tests/legacy/test_basic_ops.py
similarity index 98%
rename from weave/tests/test_basic_ops.py
rename to weave/tests/legacy/test_basic_ops.py
index fbac92a3716..c957d907e0e 100644
--- a/weave/tests/test_basic_ops.py
+++ b/weave/tests/legacy/test_basic_ops.py
@@ -2,8 +2,8 @@
 from weave.legacy.ops_primitives import number
 from weave.legacy.ops_primitives.string import *
 
-from .. import api as weave
-from ..weave_internal import make_const_node
+from ... import api as weave
+from ...weave_internal import make_const_node
 
 
 def test_number_ops():
diff --git a/weave/tests/test_box.py b/weave/tests/legacy/test_box.py
similarity index 100%
rename from weave/tests/test_box.py
rename to weave/tests/legacy/test_box.py
diff --git a/weave/tests/test_cache.py b/weave/tests/legacy/test_cache.py
similarity index 100%
rename from weave/tests/test_cache.py
rename to weave/tests/legacy/test_cache.py
diff --git a/weave/tests/test_cli.py b/weave/tests/legacy/test_cli.py
similarity index 100%
rename from weave/tests/test_cli.py
rename to weave/tests/legacy/test_cli.py
diff --git a/weave/tests/test_codify.py b/weave/tests/legacy/test_codify.py
similarity index 100%
rename from weave/tests/test_codify.py
rename to weave/tests/legacy/test_codify.py
diff --git a/weave/tests/test_compile.py b/weave/tests/legacy/test_compile.py
similarity index 99%
rename from weave/tests/test_compile.py
rename to weave/tests/legacy/test_compile.py
index 5874a581e75..c2ab8544bf7 100644
--- a/weave/tests/test_compile.py
+++ b/weave/tests/legacy/test_compile.py
@@ -9,8 +9,8 @@
 from weave.legacy.wandb_interface.wandb_stream_table import StreamTable
 from weave.weave_internal import const, define_fn, make_const_node
 
-from .. import weave_types as types
-from ..api import use
+from ... import weave_types as types
+from ...api import use
 
 
 def test_automatic_await_compile():
diff --git a/weave/tests/test_complex_calls.py b/weave/tests/legacy/test_complex_calls.py
similarity index 94%
rename from weave/tests/test_complex_calls.py
rename to weave/tests/legacy/test_complex_calls.py
index 155507e3101..07fe79a5f01 100644
--- a/weave/tests/test_complex_calls.py
+++ b/weave/tests/legacy/test_complex_calls.py
@@ -1,6 +1,6 @@
 import weave
 
-from .. import weave_internal
+from ... import weave_internal
 
 
 def test_weave_fn_in_data():
diff --git a/weave/tests/test_cond.py b/weave/tests/legacy/test_cond.py
similarity index 100%
rename from weave/tests/test_cond.py
rename to weave/tests/legacy/test_cond.py
diff --git a/weave/tests/test_const_type_mapper.py b/weave/tests/legacy/test_const_type_mapper.py
similarity index 95%
rename from weave/tests/test_const_type_mapper.py
rename to weave/tests/legacy/test_const_type_mapper.py
index c374e787f50..c6143cdc278 100644
--- a/weave/tests/test_const_type_mapper.py
+++ b/weave/tests/legacy/test_const_type_mapper.py
@@ -3,7 +3,7 @@
 import weave
 from weave.legacy import context_state
 
-from .. import weave_internal
+from ... import weave_internal
 
 _loading_builtins_token = context_state.set_loading_built_ins()
 
diff --git a/weave/tests/test_custom_types.py b/weave/tests/legacy/test_custom_types.py
similarity index 96%
rename from weave/tests/test_custom_types.py
rename to weave/tests/legacy/test_custom_types.py
index f97580f917c..5f8940785fa 100644
--- a/weave/tests/test_custom_types.py
+++ b/weave/tests/legacy/test_custom_types.py
@@ -4,9 +4,9 @@
 from weave.legacy import context_state as _context
 from weave.legacy import ops_arrow
 
-from .. import api as weave
-from .. import errors
-from . import geom
+from ... import api as weave
+from ... import errors
+from .. import geom
 
 
 def test_mapped_method_on_custom_type():
diff --git a/weave/tests/test_datetime_timestamp.py b/weave/tests/legacy/test_datetime_timestamp.py
similarity index 100%
rename from weave/tests/test_datetime_timestamp.py
rename to weave/tests/legacy/test_datetime_timestamp.py
diff --git a/weave/tests/test_decorator_type.py b/weave/tests/legacy/test_decorator_type.py
similarity index 100%
rename from weave/tests/test_decorator_type.py
rename to weave/tests/legacy/test_decorator_type.py
diff --git a/weave/tests/test_decorators.py b/weave/tests/legacy/test_decorators.py
similarity index 95%
rename from weave/tests/test_decorators.py
rename to weave/tests/legacy/test_decorators.py
index 7ded5fe16f1..1690899112b 100644
--- a/weave/tests/test_decorators.py
+++ b/weave/tests/legacy/test_decorators.py
@@ -1,8 +1,8 @@
 from weave.legacy.decorator_op import op
 
-from .. import api as weave
-from .. import storage
-from .. import weave_types as types
+from ... import api as weave
+from ... import storage
+from ... import weave_types as types
 
 
 def test_function_op_name():
diff --git a/weave/tests/test_derive_op.py b/weave/tests/legacy/test_derive_op.py
similarity index 73%
rename from weave/tests/test_derive_op.py
rename to weave/tests/legacy/test_derive_op.py
index 80120d64b82..c3bc56848be 100644
--- a/weave/tests/test_derive_op.py
+++ b/weave/tests/legacy/test_derive_op.py
@@ -1,5 +1,5 @@
-from .. import api as weave
-from .. import registry_mem
+from ... import api as weave
+from ... import registry_mem
 
 
 def test_mapped_add():
diff --git a/weave/tests/test_dispatch.py b/weave/tests/legacy/test_dispatch.py
similarity index 100%
rename from weave/tests/test_dispatch.py
rename to weave/tests/legacy/test_dispatch.py
diff --git a/weave/tests/test_examples.py b/weave/tests/legacy/test_examples.py
similarity index 96%
rename from weave/tests/test_examples.py
rename to weave/tests/legacy/test_examples.py
index 0d7f625ad16..b9324c781f8 100644
--- a/weave/tests/test_examples.py
+++ b/weave/tests/legacy/test_examples.py
@@ -3,7 +3,7 @@
 
 from weave.legacy import context, context_state
 
-from .. import api as weave
+from ... import api as weave
 
 
 class XOnly(typing.TypedDict):
diff --git a/weave/tests/test_execute.py b/weave/tests/legacy/test_execute.py
similarity index 98%
rename from weave/tests/test_execute.py
rename to weave/tests/legacy/test_execute.py
index 0aeabf0f303..c7a33bf1050 100644
--- a/weave/tests/test_execute.py
+++ b/weave/tests/legacy/test_execute.py
@@ -6,8 +6,8 @@
 import weave
 from weave.legacy import execute, ops
 
-from .. import api, environment, weave_internal
-from .. import weave_types as types
+from ... import api, environment, weave_internal
+from ... import weave_types as types
 from . import test_wb
 
 execute_test_count_op_run_count = 0
diff --git a/weave/tests/test_execute_fast.py b/weave/tests/legacy/test_execute_fast.py
similarity index 92%
rename from weave/tests/test_execute_fast.py
rename to weave/tests/legacy/test_execute_fast.py
index 5d08e304802..c196dac3a86 100644
--- a/weave/tests/test_execute_fast.py
+++ b/weave/tests/legacy/test_execute_fast.py
@@ -1,9 +1,9 @@
 import weave
 from weave.legacy import dispatch
 
-from .. import weave_internal
-from .. import weave_types as types
-from . import weavejs_ops
+from ... import weave_internal
+from ... import weave_types as types
+from .. import weavejs_ops
 
 
 def test_nested_weavejs_call():
diff --git a/weave/tests/test_execution_graphs.py b/weave/tests/legacy/test_execution_graphs.py
similarity index 100%
rename from weave/tests/test_execution_graphs.py
rename to weave/tests/legacy/test_execution_graphs.py
diff --git a/weave/tests/test_file.py b/weave/tests/legacy/test_file.py
similarity index 93%
rename from weave/tests/test_file.py
rename to weave/tests/legacy/test_file.py
index 54e92fd4a04..b1bfd214f62 100644
--- a/weave/tests/test_file.py
+++ b/weave/tests/legacy/test_file.py
@@ -3,7 +3,7 @@
 import weave
 from weave.legacy import context_state, ops
 
-from .. import api, environment, errors
+from ... import api, environment, errors
 
 
 def test_dir():
diff --git a/weave/tests/test_filesystem.py b/weave/tests/legacy/test_filesystem.py
similarity index 96%
rename from weave/tests/test_filesystem.py
rename to weave/tests/legacy/test_filesystem.py
index 5613a9e0e13..c16d005d202 100644
--- a/weave/tests/test_filesystem.py
+++ b/weave/tests/legacy/test_filesystem.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from .. import environment, errors, filesystem
+from ... import environment, errors, filesystem
 
 
 @pytest.fixture()
diff --git a/weave/tests/test_gql_to_weave.py b/weave/tests/legacy/test_gql_to_weave.py
similarity index 100%
rename from weave/tests/test_gql_to_weave.py
rename to weave/tests/legacy/test_gql_to_weave.py
diff --git a/weave/tests/test_graph.py b/weave/tests/legacy/test_graph.py
similarity index 97%
rename from weave/tests/test_graph.py
rename to weave/tests/legacy/test_graph.py
index bcf909905e6..0f09e1338a6 100644
--- a/weave/tests/test_graph.py
+++ b/weave/tests/legacy/test_graph.py
@@ -1,8 +1,8 @@
 import weave
 from weave.legacy import graph
 
-from .. import weave_internal
-from .. import weave_types as types
+from ... import weave_internal
+from ... import weave_types as types
 
 
 def test_map_dag_produces_same_len():
diff --git a/weave/tests/test_graph_debug.py b/weave/tests/legacy/test_graph_debug.py
similarity index 100%
rename from weave/tests/test_graph_debug.py
rename to weave/tests/legacy/test_graph_debug.py
diff --git a/weave/tests/test_helpers.py b/weave/tests/legacy/test_helpers.py
similarity index 100%
rename from weave/tests/test_helpers.py
rename to weave/tests/legacy/test_helpers.py
diff --git a/weave/tests/test_hypothesis.py b/weave/tests/legacy/test_hypothesis.py
similarity index 100%
rename from weave/tests/test_hypothesis.py
rename to weave/tests/legacy/test_hypothesis.py
diff --git a/weave/tests/test_index.py b/weave/tests/legacy/test_index.py
similarity index 100%
rename from weave/tests/test_index.py
rename to weave/tests/legacy/test_index.py
diff --git a/weave/tests/test_infer_types.py b/weave/tests/legacy/test_infer_types.py
similarity index 92%
rename from weave/tests/test_infer_types.py
rename to weave/tests/legacy/test_infer_types.py
index 5aa962a5046..e508dad0d49 100644
--- a/weave/tests/test_infer_types.py
+++ b/weave/tests/legacy/test_infer_types.py
@@ -2,7 +2,7 @@
 
 from weave.legacy import graph
 
-from .. import infer_types, weave_types
+from ... import infer_types, weave_types
 
 
 def test_node_with_generic():
diff --git a/weave/tests/test_io_service.py b/weave/tests/legacy/test_io_service.py
similarity index 98%
rename from weave/tests/test_io_service.py
rename to weave/tests/legacy/test_io_service.py
index 55b483f6ecd..5f3f0103d2c 100644
--- a/weave/tests/test_io_service.py
+++ b/weave/tests/legacy/test_io_service.py
@@ -4,7 +4,7 @@
 
 from weave.legacy import io_service
 
-from .. import filesystem
+from ... import filesystem
 
 
 @pytest.mark.timeout(10)
diff --git a/weave/tests/test_join.py b/weave/tests/legacy/test_join.py
similarity index 100%
rename from weave/tests/test_join.py
rename to weave/tests/legacy/test_join.py
diff --git a/weave/tests/test_js_compat.py b/weave/tests/legacy/test_js_compat.py
similarity index 100%
rename from weave/tests/test_js_compat.py
rename to weave/tests/legacy/test_js_compat.py
diff --git a/weave/tests/test_language.py b/weave/tests/legacy/test_language.py
similarity index 100%
rename from weave/tests/test_language.py
rename to weave/tests/legacy/test_language.py
diff --git a/weave/tests/test_language_autocall.py b/weave/tests/legacy/test_language_autocall.py
similarity index 100%
rename from weave/tests/test_language_autocall.py
rename to weave/tests/legacy/test_language_autocall.py
diff --git a/weave/tests/test_levenshtein.py b/weave/tests/legacy/test_levenshtein.py
similarity index 100%
rename from weave/tests/test_levenshtein.py
rename to weave/tests/legacy/test_levenshtein.py
diff --git a/weave/tests/test_list_arrow_compat.py b/weave/tests/legacy/test_list_arrow_compat.py
similarity index 99%
rename from weave/tests/test_list_arrow_compat.py
rename to weave/tests/legacy/test_list_arrow_compat.py
index f8bf32c371e..46eb15f38fd 100644
--- a/weave/tests/test_list_arrow_compat.py
+++ b/weave/tests/legacy/test_list_arrow_compat.py
@@ -14,9 +14,9 @@
 )
 from weave.legacy.ops_primitives import dict_, list_
 
-from .. import api as weave
-from ..tests import tag_test_util as ttu
-from . import list_arrow_test_helpers as lath
+from ... import api as weave
+from ...tests import tag_test_util as ttu
+from .. import list_arrow_test_helpers as lath
 
 
 def filter_fn(row) -> bool:
diff --git a/weave/tests/test_list_indexing.py b/weave/tests/legacy/test_list_indexing.py
similarity index 100%
rename from weave/tests/test_list_indexing.py
rename to weave/tests/legacy/test_list_indexing.py
diff --git a/weave/tests/test_logging.py b/weave/tests/legacy/test_logging.py
similarity index 98%
rename from weave/tests/test_logging.py
rename to weave/tests/legacy/test_logging.py
index 374a84c119d..3be54e5940a 100644
--- a/weave/tests/test_logging.py
+++ b/weave/tests/legacy/test_logging.py
@@ -6,7 +6,7 @@
 
 from weave.legacy import context, ops
 
-from .. import api, logs, server, weave_server
+from ... import api, logs, server, weave_server
 
 
 def test_logfile_created(fresh_server_logfile):
diff --git a/weave/tests/test_mappability.py b/weave/tests/legacy/test_mappability.py
similarity index 97%
rename from weave/tests/test_mappability.py
rename to weave/tests/legacy/test_mappability.py
index 55659947d22..9beb8445a63 100644
--- a/weave/tests/test_mappability.py
+++ b/weave/tests/legacy/test_mappability.py
@@ -2,9 +2,9 @@
 from weave.legacy import context_state as _context
 from weave.legacy import graph
 
-from .. import registry_mem
-from .. import weave_types as types
-from ..weave_internal import make_const_node
+from ... import registry_mem
+from ... import weave_types as types
+from ...weave_internal import make_const_node
 
 _loading_builtins_token = _context.set_loading_built_ins()
 
diff --git a/weave/tests/test_mappers_arrow.py b/weave/tests/legacy/test_mappers_arrow.py
similarity index 100%
rename from weave/tests/test_mappers_arrow.py
rename to weave/tests/legacy/test_mappers_arrow.py
diff --git a/weave/tests/test_mappers_python.py b/weave/tests/legacy/test_mappers_python.py
similarity index 100%
rename from weave/tests/test_mappers_python.py
rename to weave/tests/legacy/test_mappers_python.py
diff --git a/weave/tests/test_media.py b/weave/tests/legacy/test_media.py
similarity index 98%
rename from weave/tests/test_media.py
rename to weave/tests/legacy/test_media.py
index d4343896587..ef35626c307 100644
--- a/weave/tests/test_media.py
+++ b/weave/tests/legacy/test_media.py
@@ -3,8 +3,8 @@
 import numpy as np
 import pytest
 
-from .. import storage
-from .. import weave_types as types
+from ... import storage
+from ... import weave_types as types
 
 
 def test_nparray():
diff --git a/weave/tests/test_media_user.py b/weave/tests/legacy/test_media_user.py
similarity index 96%
rename from weave/tests/test_media_user.py
rename to weave/tests/legacy/test_media_user.py
index 02cf7f7748c..edbceb07217 100644
--- a/weave/tests/test_media_user.py
+++ b/weave/tests/legacy/test_media_user.py
@@ -3,7 +3,7 @@
 from weave.legacy import context_state
 from weave.legacy.ops_primitives import geom as media_user
 
-from .. import api as weave
+from ... import api as weave
 
 
 def test_im_with_metadata():
diff --git a/weave/tests/test_monitoring.py b/weave/tests/legacy/test_monitoring.py
similarity index 100%
rename from weave/tests/test_monitoring.py
rename to weave/tests/legacy/test_monitoring.py
diff --git a/weave/tests/test_monitoring_openai.py b/weave/tests/legacy/test_monitoring_openai.py
similarity index 100%
rename from weave/tests/test_monitoring_openai.py
rename to weave/tests/legacy/test_monitoring_openai.py
diff --git a/weave/tests/test_mutation2.py b/weave/tests/legacy/test_mutation2.py
similarity index 99%
rename from weave/tests/test_mutation2.py
rename to weave/tests/legacy/test_mutation2.py
index af5e3794877..9fe786a3578 100644
--- a/weave/tests/test_mutation2.py
+++ b/weave/tests/legacy/test_mutation2.py
@@ -2,7 +2,7 @@
 from weave import ref_base
 from weave.legacy import uris
 
-from .. import weave_internal
+from ... import weave_internal
 
 
 def test_mutation_set_direct_call():
diff --git a/weave/tests/test_mutations.py b/weave/tests/legacy/test_mutations.py
similarity index 96%
rename from weave/tests/test_mutations.py
rename to weave/tests/legacy/test_mutations.py
index bf68b9a0b1a..983b777ee05 100644
--- a/weave/tests/test_mutations.py
+++ b/weave/tests/legacy/test_mutations.py
@@ -1,7 +1,7 @@
 from weave.legacy import ops
 
-from .. import api as weave
-from .. import storage, weave_internal
+from ... import api as weave
+from ... import storage, weave_internal
 
 
 def test_autocommit(cereal_csv):
diff --git a/weave/tests/test_node_ref.py b/weave/tests/legacy/test_node_ref.py
similarity index 88%
rename from weave/tests/test_node_ref.py
rename to weave/tests/legacy/test_node_ref.py
index 3ada94b6062..79af6d43a29 100644
--- a/weave/tests/test_node_ref.py
+++ b/weave/tests/legacy/test_node_ref.py
@@ -1,7 +1,7 @@
 from weave.legacy import graph
 
-from .. import api as weave
-from .. import node_ref
+from ... import api as weave
+from ... import node_ref
 
 
 def test_node_to_ref():
diff --git a/weave/tests/test_nullability.py b/weave/tests/legacy/test_nullability.py
similarity index 100%
rename from weave/tests/test_nullability.py
rename to weave/tests/legacy/test_nullability.py
diff --git a/weave/tests/test_number_bin.py b/weave/tests/legacy/test_number_bin.py
similarity index 100%
rename from weave/tests/test_number_bin.py
rename to weave/tests/legacy/test_number_bin.py
diff --git a/weave/tests/test_numpy.py b/weave/tests/legacy/test_numpy.py
similarity index 98%
rename from weave/tests/test_numpy.py
rename to weave/tests/legacy/test_numpy.py
index caac3c187df..cdb89250c9e 100644
--- a/weave/tests/test_numpy.py
+++ b/weave/tests/legacy/test_numpy.py
@@ -3,8 +3,8 @@
 from weave.legacy import artifact_fs, artifact_wandb
 from weave.legacy.ops_domain import table
 
-from .. import types_numpy as numpy_types
-from .. import weave_types as types
+from ... import types_numpy as numpy_types
+from ... import weave_types as types
 
 
 def test_construct_numpy_type():
diff --git a/weave/tests/test_op.py b/weave/tests/legacy/test_op.py
similarity index 100%
rename from weave/tests/test_op.py
rename to weave/tests/legacy/test_op.py
diff --git a/weave/tests/test_op_behaviors.py b/weave/tests/legacy/test_op_behaviors.py
similarity index 99%
rename from weave/tests/test_op_behaviors.py
rename to weave/tests/legacy/test_op_behaviors.py
index 8983376896c..263f8541647 100644
--- a/weave/tests/test_op_behaviors.py
+++ b/weave/tests/legacy/test_op_behaviors.py
@@ -25,13 +25,13 @@
     TaggedValueType,
 )
 
-from .. import registry_mem, storage, weave_internal
-from .concrete_tagged_value import (
+from ... import registry_mem, storage, weave_internal
+from ..concrete_tagged_value import (
     TaggedValue,
     concrete_from_tagstore,
     concrete_to_tagstore,
 )
-from .op_specs import OP_TEST_SPECS, OpSpec, OpSpecTestCase
+from ..op_specs import OP_TEST_SPECS, OpSpec, OpSpecTestCase
 
 
 def assert_equal_with_tags(node: graph.Node, v: typing.Any, expected: typing.Any):
diff --git a/weave/tests/test_op_coverage.py b/weave/tests/legacy/test_op_coverage.py
similarity index 99%
rename from weave/tests/test_op_coverage.py
rename to weave/tests/legacy/test_op_coverage.py
index 812a88b184f..2eadbe3c55d 100644
--- a/weave/tests/test_op_coverage.py
+++ b/weave/tests/legacy/test_op_coverage.py
@@ -1,4 +1,4 @@
-from .. import registry_mem
+from ... import registry_mem
 
 
 def make_error_message(missing_ops, section_name):
diff --git a/weave/tests/test_op_def.py b/weave/tests/legacy/test_op_def.py
similarity index 95%
rename from weave/tests/test_op_def.py
rename to weave/tests/legacy/test_op_def.py
index 520f0d9624c..aa24f185282 100644
--- a/weave/tests/test_op_def.py
+++ b/weave/tests/legacy/test_op_def.py
@@ -7,7 +7,7 @@
 
 from weave.legacy import context_state
 
-from .. import api as weave
+from ... import api as weave
 
 _loading_builtins_token = context_state.set_loading_built_ins()
 
@@ -57,7 +57,7 @@ def wrap(obj: typing.Any):
 context_state.clear_loading_built_ins(_loading_builtins_token)
 
 
-test_data = json.load(open("./tests/test_op_def_data.json"))
+test_data = json.load(open("./tests/legacy/test_op_def_data.json"))
 
 
 def test_op_def_to_dict():
diff --git a/weave/tests/test_op_def_data.json b/weave/tests/legacy/test_op_def_data.json
similarity index 100%
rename from weave/tests/test_op_def_data.json
rename to weave/tests/legacy/test_op_def_data.json
diff --git a/weave/tests/test_op_def_type.py b/weave/tests/legacy/test_op_def_type.py
similarity index 100%
rename from weave/tests/test_op_def_type.py
rename to weave/tests/legacy/test_op_def_type.py
diff --git a/weave/tests/test_op_dispatching.py b/weave/tests/legacy/test_op_dispatching.py
similarity index 99%
rename from weave/tests/test_op_dispatching.py
rename to weave/tests/legacy/test_op_dispatching.py
index 34dec12c493..fbd1d51df36 100644
--- a/weave/tests/test_op_dispatching.py
+++ b/weave/tests/legacy/test_op_dispatching.py
@@ -7,7 +7,7 @@
 from weave.legacy.language_features.tagging.tagged_value_type import TaggedValueType
 from weave.legacy.ops_domain import wb_domain_types
 
-from .. import weave_internal
+from ... import weave_internal
 
 _loading_builtins_token = _context.set_loading_built_ins()
 
diff --git a/weave/tests/test_op_serialization.py b/weave/tests/legacy/test_op_serialization.py
similarity index 100%
rename from weave/tests/test_op_serialization.py
rename to weave/tests/legacy/test_op_serialization.py
diff --git a/weave/tests/test_panel_coverage.py b/weave/tests/legacy/test_panel_coverage.py
similarity index 100%
rename from weave/tests/test_panel_coverage.py
rename to weave/tests/legacy/test_panel_coverage.py
diff --git a/weave/tests/test_panel_time_series.py b/weave/tests/legacy/test_panel_time_series.py
similarity index 100%
rename from weave/tests/test_panel_time_series.py
rename to weave/tests/legacy/test_panel_time_series.py
diff --git a/weave/tests/test_panels.py b/weave/tests/legacy/test_panels.py
similarity index 96%
rename from weave/tests/test_panels.py
rename to weave/tests/legacy/test_panels.py
index 45be66f77f7..867f9842c0f 100644
--- a/weave/tests/test_panels.py
+++ b/weave/tests/legacy/test_panels.py
@@ -3,9 +3,9 @@
 import weave
 from weave.legacy.panels import panel_plot
 
-from .. import storage, weave_internal
-from ..legacy.panels.panel_group import Group
-from ..legacy.panels.panel_slider import Slider
+from ... import storage, weave_internal
+from ...legacy.panels.panel_group import Group
+from ...legacy.panels.panel_slider import Slider
 
 
 def test_panel_id():
diff --git a/weave/tests/test_partial_object.py b/weave/tests/legacy/test_partial_object.py
similarity index 99%
rename from weave/tests/test_partial_object.py
rename to weave/tests/legacy/test_partial_object.py
index 70ed144af49..20ed18e55b1 100644
--- a/weave/tests/test_partial_object.py
+++ b/weave/tests/legacy/test_partial_object.py
@@ -4,8 +4,8 @@
 from weave.legacy.ops_domain.project_ops import root_all_projects
 from weave.legacy.ops_domain.report_ops import root_all_reports
 
-from .. import api as weave
-from .. import weave_types as types
+from ... import api as weave
+from ... import weave_types as types
 from .test_wb import table_mock1_no_display_name
 
 
diff --git a/weave/tests/test_plot.py b/weave/tests/legacy/test_plot.py
similarity index 99%
rename from weave/tests/test_plot.py
rename to weave/tests/legacy/test_plot.py
index b7572e6f279..768667e3bd2 100644
--- a/weave/tests/test_plot.py
+++ b/weave/tests/legacy/test_plot.py
@@ -8,7 +8,7 @@
 from weave.legacy import graph
 from weave.legacy.panels.panel_plot import Plot, PlotConstants, Series
 
-from .. import storage
+from ... import storage
 from .test_run_segment import create_experiment
 
 
diff --git a/weave/tests/test_projection_timeout.py b/weave/tests/legacy/test_projection_timeout.py
similarity index 96%
rename from weave/tests/test_projection_timeout.py
rename to weave/tests/legacy/test_projection_timeout.py
index 9368a07ed64..b9fd2a7e21e 100644
--- a/weave/tests/test_projection_timeout.py
+++ b/weave/tests/legacy/test_projection_timeout.py
@@ -3,7 +3,7 @@
 
 from weave.legacy.ops_primitives import projection_utils
 
-from .. import errors
+from ... import errors
 
 
 def test_projection_timeout():
diff --git a/weave/tests/test_publish_flow.py b/weave/tests/legacy/test_publish_flow.py
similarity index 100%
rename from weave/tests/test_publish_flow.py
rename to weave/tests/legacy/test_publish_flow.py
diff --git a/weave/tests/test_pydantic.py b/weave/tests/legacy/test_pydantic.py
similarity index 100%
rename from weave/tests/test_pydantic.py
rename to weave/tests/legacy/test_pydantic.py
diff --git a/weave/tests/test_ref_tracking.py b/weave/tests/legacy/test_ref_tracking.py
similarity index 100%
rename from weave/tests/test_ref_tracking.py
rename to weave/tests/legacy/test_ref_tracking.py
diff --git a/weave/tests/test_refs.py b/weave/tests/legacy/test_refs.py
similarity index 99%
rename from weave/tests/test_refs.py
rename to weave/tests/legacy/test_refs.py
index d9906893277..a1cfd4ccca7 100644
--- a/weave/tests/test_refs.py
+++ b/weave/tests/legacy/test_refs.py
@@ -10,7 +10,7 @@
     OBJECT_ATTR_EDGE_NAME,
 )
 
-from .. import ref_util, storage
+from ... import ref_util, storage
 
 
 def test_laref_artifact_version_1():
diff --git a/weave/tests/test_relpath_no_syscalls.py b/weave/tests/legacy/test_relpath_no_syscalls.py
similarity index 97%
rename from weave/tests/test_relpath_no_syscalls.py
rename to weave/tests/legacy/test_relpath_no_syscalls.py
index d02445db7ad..3138cabaabd 100644
--- a/weave/tests/test_relpath_no_syscalls.py
+++ b/weave/tests/legacy/test_relpath_no_syscalls.py
@@ -1,6 +1,6 @@
 import pytest
 
-from ..util import relpath_no_syscalls
+from ...util import relpath_no_syscalls
 
 
 @pytest.mark.parametrize(
diff --git a/weave/tests/test_run_segment.py b/weave/tests/legacy/test_run_segment.py
similarity index 99%
rename from weave/tests/test_run_segment.py
rename to weave/tests/legacy/test_run_segment.py
index aa553cf89da..3e78e3614ff 100644
--- a/weave/tests/test_run_segment.py
+++ b/weave/tests/legacy/test_run_segment.py
@@ -10,8 +10,8 @@
 from weave.legacy.ops_arrow import ArrowWeaveList, arrow_as_array
 from weave.legacy.ops_domain.run_segment import RunSegment
 
-from .. import api, storage, weave_internal
-from .. import weave_types as types
+from ... import api, storage, weave_internal
+from ... import weave_types as types
 
 N_NUMERIC_METRICS = 99  # number of numerical columns in the metrics table
 
diff --git a/weave/tests/test_serialize.py b/weave/tests/legacy/test_serialize.py
similarity index 99%
rename from weave/tests/test_serialize.py
rename to weave/tests/legacy/test_serialize.py
index 1f72edd68a1..85b34ced7f9 100644
--- a/weave/tests/test_serialize.py
+++ b/weave/tests/legacy/test_serialize.py
@@ -8,7 +8,7 @@
 from weave.legacy.ops_primitives import list_
 from weave.weave_internal import make_const_node
 
-from . import fixture_fakewandb as fwb
+from .. import fixture_fakewandb as fwb
 
 response = {
     "project_518fa79465d8ffaeb91015dce87e092f": {
diff --git a/weave/tests/test_show.py b/weave/tests/legacy/test_show.py
similarity index 95%
rename from weave/tests/test_show.py
rename to weave/tests/legacy/test_show.py
index ff552ca81cf..859309436e1 100644
--- a/weave/tests/test_show.py
+++ b/weave/tests/legacy/test_show.py
@@ -4,7 +4,7 @@
 
 from weave.legacy import ops
 
-from ..show import _show_params
+from ...show import _show_params
 from . import test_helpers
 
 
diff --git a/weave/tests/test_stitch.py b/weave/tests/legacy/test_stitch.py
similarity index 99%
rename from weave/tests/test_stitch.py
rename to weave/tests/legacy/test_stitch.py
index 8690af1f38b..ca205300d39 100644
--- a/weave/tests/test_stitch.py
+++ b/weave/tests/legacy/test_stitch.py
@@ -8,8 +8,8 @@
 from weave.legacy.language_features.tagging import make_tag_getter_op
 from weave.legacy.ops_domain import run_ops
 
-from .. import stitch, weave_internal
-from . import fixture_fakewandb as fwb
+from ... import stitch, weave_internal
+from .. import fixture_fakewandb as fwb
 from . import test_wb
 
 _loading_builtins_token = _context.set_loading_built_ins()
diff --git a/weave/tests/test_storage.py b/weave/tests/legacy/test_storage.py
similarity index 98%
rename from weave/tests/test_storage.py
rename to weave/tests/legacy/test_storage.py
index 57e9d3b08eb..f1e7cb3f9e6 100644
--- a/weave/tests/test_storage.py
+++ b/weave/tests/legacy/test_storage.py
@@ -10,11 +10,11 @@
 from weave.legacy import artifact_mem, artifact_wandb, mappers_python, ops_arrow
 from weave.legacy.arrow import list_ as arrow
 
-from .. import api as weave
-from .. import storage
-from .. import weave_types as types
-from ..weave_internal import make_const_node
-from ..weavejs_fixes import recursively_unwrap_unions
+from ... import api as weave
+from ... import storage
+from ... import weave_types as types
+from ...weave_internal import make_const_node
+from ...weavejs_fixes import recursively_unwrap_unions
 from . import test_helpers
 
 
diff --git a/weave/tests/test_table_ops.py b/weave/tests/legacy/test_table_ops.py
similarity index 98%
rename from weave/tests/test_table_ops.py
rename to weave/tests/legacy/test_table_ops.py
index f43bf8ac5fb..10af48c5289 100644
--- a/weave/tests/test_table_ops.py
+++ b/weave/tests/legacy/test_table_ops.py
@@ -6,10 +6,10 @@
 from weave.legacy import box, context, context_state, graph, ops
 from weave.legacy.ops_domain import table as table_ops
 
-from .. import api as weave
-from .. import storage, weave_internal
-from .. import weave_types as types
-from . import weavejs_ops
+from ... import api as weave
+from ... import storage, weave_internal
+from ... import weave_types as types
+from .. import weavejs_ops
 
 TABLE_TYPES = ["list", "pandas", "sql"]
 
diff --git a/weave/tests/test_tagging.py b/weave/tests/legacy/test_tagging.py
similarity index 99%
rename from weave/tests/test_tagging.py
rename to weave/tests/legacy/test_tagging.py
index b3f9eb0a8ff..96bf56ba747 100644
--- a/weave/tests/test_tagging.py
+++ b/weave/tests/legacy/test_tagging.py
@@ -14,8 +14,8 @@
 from weave.legacy.ops_primitives import dict as dict_ops
 from weave.legacy.ops_primitives import list_ as list_ops
 
-from .. import weave_internal
-from .. import weave_types as types
+from ... import weave_internal
+from ... import weave_types as types
 
 
 def test_tagged_value():
diff --git a/weave/tests/test_templates.py b/weave/tests/legacy/test_templates.py
similarity index 98%
rename from weave/tests/test_templates.py
rename to weave/tests/legacy/test_templates.py
index 16fc27ff515..76335e6f118 100644
--- a/weave/tests/test_templates.py
+++ b/weave/tests/legacy/test_templates.py
@@ -6,7 +6,7 @@
 from weave.legacy import context_state as _context
 from weave.legacy.wandb_interface.wandb_stream_table import StreamTable
 
-from ..legacy.panels_py import generator_templates
+from ...legacy.panels_py import generator_templates
 
 _loading_builtins_token = _context.set_loading_built_ins()
 
diff --git a/weave/tests/test_timestamp_bin.py b/weave/tests/legacy/test_timestamp_bin.py
similarity index 100%
rename from weave/tests/test_timestamp_bin.py
rename to weave/tests/legacy/test_timestamp_bin.py
diff --git a/weave/tests/test_trace.py b/weave/tests/legacy/test_trace.py
similarity index 90%
rename from weave/tests/test_trace.py
rename to weave/tests/legacy/test_trace.py
index 15aeea34f53..bd32cec3e01 100644
--- a/weave/tests/test_trace.py
+++ b/weave/tests/legacy/test_trace.py
@@ -2,9 +2,9 @@
 
 from weave.legacy import graph
 
-from .. import api as weave
-from .. import storage, trace_legacy
-from ..weave_internal import make_const_node
+from ... import api as weave
+from ... import storage, trace_legacy
+from ...weave_internal import make_const_node
 
 
 def test_node_expr():
diff --git a/weave/tests/test_typeddict_notrequired.py b/weave/tests/legacy/test_typeddict_notrequired.py
similarity index 100%
rename from weave/tests/test_typeddict_notrequired.py
rename to weave/tests/legacy/test_typeddict_notrequired.py
diff --git a/weave/tests/test_uris.py b/weave/tests/legacy/test_uris.py
similarity index 100%
rename from weave/tests/test_uris.py
rename to weave/tests/legacy/test_uris.py
diff --git a/weave/tests/test_wb.py b/weave/tests/legacy/test_wb.py
similarity index 99%
rename from weave/tests/test_wb.py
rename to weave/tests/legacy/test_wb.py
index 464c1c5afc8..fd55c897b0b 100644
--- a/weave/tests/test_wb.py
+++ b/weave/tests/legacy/test_wb.py
@@ -18,10 +18,10 @@
 from weave.legacy.ops_domain import wb_domain_types as wdt
 from weave.legacy.ops_primitives import dict_, list_
 from weave.legacy.ops_primitives.file import _as_w0_dict_
-from weave.tests.test_wb_domain_ops import assert_gql_str_equal
+from weave.tests.legacy.test_wb_domain_ops import assert_gql_str_equal
 
-from . import fixture_fakewandb as fwb
-from . import weavejs_ops
+from .. import fixture_fakewandb as fwb
+from .. import weavejs_ops
 
 file_path_response = {
     "project_518fa79465d8ffaeb91015dce87e092f": {
diff --git a/weave/tests/test_wb_data_types.py b/weave/tests/legacy/test_wb_data_types.py
similarity index 99%
rename from weave/tests/test_wb_data_types.py
rename to weave/tests/legacy/test_wb_data_types.py
index 686dcf23951..5e3ac21c54c 100644
--- a/weave/tests/test_wb_data_types.py
+++ b/weave/tests/legacy/test_wb_data_types.py
@@ -21,7 +21,7 @@
 from weave.legacy.wandb_client_api import wandb_gql_query
 from weave.legacy.wandb_util import weave0_type_json_to_weave1_type
 
-from .fixture_fakewandb import FakeApi
+from ..fixture_fakewandb import FakeApi
 
 
 class RandomClass:
diff --git a/weave/tests/test_wb_domain_ops.py b/weave/tests/legacy/test_wb_domain_ops.py
similarity index 99%
rename from weave/tests/test_wb_domain_ops.py
rename to weave/tests/legacy/test_wb_domain_ops.py
index 333853c99da..ec2d5bb74bd 100644
--- a/weave/tests/test_wb_domain_ops.py
+++ b/weave/tests/legacy/test_wb_domain_ops.py
@@ -9,8 +9,8 @@
 from weave.legacy.ops_domain import wb_domain_types
 from weave.legacy.ops_primitives import _dict_utils
 
-from .. import registry_mem
-from . import fixture_fakewandb as fwb
+from ... import registry_mem
+from .. import fixture_fakewandb as fwb
 
 """
 Tests in this file whould be used to test the graphs that can be constructed
diff --git a/weave/tests/test_wb_domain_types.py b/weave/tests/legacy/test_wb_domain_types.py
similarity index 94%
rename from weave/tests/test_wb_domain_types.py
rename to weave/tests/legacy/test_wb_domain_types.py
index b7597a047b4..d41f3ea469a 100644
--- a/weave/tests/test_wb_domain_types.py
+++ b/weave/tests/legacy/test_wb_domain_types.py
@@ -1,8 +1,8 @@
 from weave.legacy.ops_domain import wb_domain_types as wdt
 
-from .. import api as weave
-from .. import storage
-from .. import weave_types as types
+from ... import api as weave
+from ... import storage
+from ... import weave_types as types
 
 
 def test_with_keys_assignability():
diff --git a/weave/tests/test_wb_end_to_end.py b/weave/tests/legacy/test_wb_end_to_end.py
similarity index 100%
rename from weave/tests/test_wb_end_to_end.py
rename to weave/tests/legacy/test_wb_end_to_end.py
diff --git a/weave/tests/test_wb_history_loading_compatability.py b/weave/tests/legacy/test_wb_history_loading_compatability.py
similarity index 100%
rename from weave/tests/test_wb_history_loading_compatability.py
rename to weave/tests/legacy/test_wb_history_loading_compatability.py
diff --git a/weave/tests/test_wb_stream_table.py b/weave/tests/legacy/test_wb_stream_table.py
similarity index 100%
rename from weave/tests/test_wb_stream_table.py
rename to weave/tests/legacy/test_wb_stream_table.py
diff --git a/weave/tests/test_wb_tables.py b/weave/tests/legacy/test_wb_tables.py
similarity index 100%
rename from weave/tests/test_wb_tables.py
rename to weave/tests/legacy/test_wb_tables.py
diff --git a/weave/tests/test_weave_api.py b/weave/tests/legacy/test_weave_api.py
similarity index 100%
rename from weave/tests/test_weave_api.py
rename to weave/tests/legacy/test_weave_api.py
diff --git a/weave/tests/test_weave_types.py b/weave/tests/legacy/test_weave_types.py
similarity index 99%
rename from weave/tests/test_weave_types.py
rename to weave/tests/legacy/test_weave_types.py
index 8f651da1eff..f19c8097114 100644
--- a/weave/tests/test_weave_types.py
+++ b/weave/tests/legacy/test_weave_types.py
@@ -9,8 +9,8 @@
 from weave.legacy.language_features.tagging.tagged_value_type import TaggedValueType
 from weave.legacy.ops_domain import wbmedia
 
-from .. import errors
-from .. import weave_types as types
+from ... import errors
+from ... import weave_types as types
 
 
 def test_typeof_string():
diff --git a/weave/tests/test_weavejs_fixes.py b/weave/tests/legacy/test_weavejs_fixes.py
similarity index 96%
rename from weave/tests/test_weavejs_fixes.py
rename to weave/tests/legacy/test_weavejs_fixes.py
index 12fed1022c2..32cff043df9 100644
--- a/weave/tests/test_weavejs_fixes.py
+++ b/weave/tests/legacy/test_weavejs_fixes.py
@@ -4,8 +4,8 @@
 
 from weave.legacy import context_state, mappers_python, ops
 
-from .. import api, weave_internal, weavejs_fixes
-from .. import weave_types as types
+from ... import api, weave_internal, weavejs_fixes
+from ... import weave_types as types
 
 
 @pytest.mark.skip(
diff --git a/weave/tests/test_weavify.py b/weave/tests/legacy/test_weavify.py
similarity index 96%
rename from weave/tests/test_weavify.py
rename to weave/tests/legacy/test_weavify.py
index 6cddea72106..5a2d0e5cda7 100644
--- a/weave/tests/test_weavify.py
+++ b/weave/tests/legacy/test_weavify.py
@@ -2,9 +2,9 @@
 
 from weave.legacy import graph, ops
 
-from .. import weave_internal, weavify
-from .. import weave_types as types
-from . import geom
+from ... import weave_internal, weavify
+from ... import weave_types as types
+from .. import geom
 
 
 @pytest.mark.parametrize(
diff --git a/weave/tests/test_with_columns.py b/weave/tests/legacy/test_with_columns.py
similarity index 100%
rename from weave/tests/test_with_columns.py
rename to weave/tests/legacy/test_with_columns.py
diff --git a/weave/tests/stream_table_test.py b/weave/tests/stream_table/test_stream_table.py
similarity index 100%
rename from weave/tests/stream_table_test.py
rename to weave/tests/stream_table/test_stream_table.py
diff --git a/weave/tests/op_versioning_importfrom.py b/weave/tests/trace/op_versioning_importfrom.py
similarity index 100%
rename from weave/tests/op_versioning_importfrom.py
rename to weave/tests/trace/op_versioning_importfrom.py
diff --git a/weave/tests/op_versioning_inlineimport.py b/weave/tests/trace/op_versioning_inlineimport.py
similarity index 100%
rename from weave/tests/op_versioning_inlineimport.py
rename to weave/tests/trace/op_versioning_inlineimport.py
diff --git a/weave/tests/op_versioning_obj.py b/weave/tests/trace/op_versioning_obj.py
similarity index 100%
rename from weave/tests/op_versioning_obj.py
rename to weave/tests/trace/op_versioning_obj.py
diff --git a/weave/tests/op_versioning_solo.py b/weave/tests/trace/op_versioning_solo.py
similarity index 100%
rename from weave/tests/op_versioning_solo.py
rename to weave/tests/trace/op_versioning_solo.py
diff --git a/weave/tests/test_anonymous_ops.py b/weave/tests/trace/test_anonymous_ops.py
similarity index 100%
rename from weave/tests/test_anonymous_ops.py
rename to weave/tests/trace/test_anonymous_ops.py
diff --git a/weave/tests/test_client_feedback.py b/weave/tests/trace/test_client_feedback.py
similarity index 97%
rename from weave/tests/test_client_feedback.py
rename to weave/tests/trace/test_client_feedback.py
index bd670ec58db..82d87198e13 100644
--- a/weave/tests/test_client_feedback.py
+++ b/weave/tests/trace/test_client_feedback.py
@@ -1,8 +1,8 @@
 import pytest
 
-from ..trace_server import trace_server_interface as tsi
-from ..trace_server.errors import InvalidRequest
-from ..trace_server.interface.query import Query
+from weave.trace_server import trace_server_interface as tsi
+from weave.trace_server.errors import InvalidRequest
+from weave.trace_server.interface.query import Query
 
 
 def test_feedback_apis(client):
diff --git a/weave/tests/test_client_trace.py b/weave/tests/trace/test_client_trace.py
similarity index 98%
rename from weave/tests/test_client_trace.py
rename to weave/tests/trace/test_client_trace.py
index a726fcce879..c10fb34687a 100644
--- a/weave/tests/test_client_trace.py
+++ b/weave/tests/trace/test_client_trace.py
@@ -15,17 +15,16 @@
 import weave
 from weave import Thread, ThreadPoolExecutor, weave_client
 from weave.trace.vals import MissingSelfInstanceError
+from weave.trace_server import trace_server_interface as tsi
 from weave.trace_server.ids import generate_id
 from weave.trace_server.refs_internal import extra_value_quoter
 from weave.trace_server.sqlite_trace_server import SqliteTraceServer
-from weave.weave_client import sanitize_object_name
-
-from ..trace_server import trace_server_interface as tsi
-from ..trace_server.trace_server_interface_util import (
+from weave.trace_server.trace_server_interface_util import (
     TRACE_REF_SCHEME,
     WILDCARD_ARTIFACT_VERSION_AND_PATH,
     extract_refs_from_values,
 )
+from weave.weave_client import sanitize_object_name
 
 pytestmark = pytest.mark.trace
 
@@ -2300,6 +2299,56 @@ def test_obj(val):
         assert inner_res.count == count
 
 
+def test_in_operation(client):
+    @weave.op()
+    def test_op(label, val):
+        return val
+
+    test_op(1, [1, 2, 3])
+    test_op(2, [1, 2, 3])
+    test_op(3, [5, 6, 7])
+    test_op(4, [8, 2, 3])
+
+    call_ids = [call.id for call in test_op.calls()]
+    assert len(call_ids) == 4
+
+    query = {
+        "$in": [
+            {"$getField": "id"},
+            [{"$literal": call_id} for call_id in call_ids[:2]],
+        ]
+    }
+
+    res = get_client_trace_server(client).calls_query_stats(
+        tsi.CallsQueryStatsReq.model_validate(
+            dict(
+                project_id=get_client_project_id(client),
+                query={"$expr": query},
+            )
+        )
+    )
+    assert res.count == 2
+
+    query = {
+        "$in": [
+            {"$getField": "id"},
+            [{"$literal": call_id} for call_id in call_ids],
+        ]
+    }
+    res = get_client_trace_server(client).calls_query_stream(
+        tsi.CallsQueryReq.model_validate(
+            dict(
+                project_id=get_client_project_id(client),
+                query={"$expr": query},
+            )
+        )
+    )
+    res = list(res)
+    assert len(res) == 4
+    for i in range(4):
+        assert res[i].id == call_ids[i]
+
+
 def test_call_has_client_version(client):
     @weave.op
     def test():
diff --git a/weave/tests/test_evaluate.py b/weave/tests/trace/test_evaluate.py
similarity index 100%
rename from weave/tests/test_evaluate.py
rename to weave/tests/trace/test_evaluate.py
diff --git a/weave/tests/test_evaluations.py b/weave/tests/trace/test_evaluations.py
similarity index 99%
rename from weave/tests/test_evaluations.py
rename to weave/tests/trace/test_evaluations.py
index 22962ee4b3c..ca636176d41 100644
--- a/weave/tests/test_evaluations.py
+++ b/weave/tests/trace/test_evaluations.py
@@ -4,8 +4,7 @@
 
 import weave
 from weave import Evaluation, Model
-
-from ..trace_server import trace_server_interface as tsi
+from weave.trace_server import trace_server_interface as tsi
 
 
 def flatten_calls(
diff --git a/weave/tests/test_exec.py b/weave/tests/trace/test_exec.py
similarity index 100%
rename from weave/tests/test_exec.py
rename to weave/tests/trace/test_exec.py
diff --git a/weave/tests/test_feedback.py b/weave/tests/trace/test_feedback.py
similarity index 100%
rename from weave/tests/test_feedback.py
rename to weave/tests/trace/test_feedback.py
diff --git a/weave/tests/test_op_call_method.py b/weave/tests/trace/test_op_call_method.py
similarity index 100%
rename from weave/tests/test_op_call_method.py
rename to weave/tests/trace/test_op_call_method.py
diff --git a/weave/tests/test_op_decorator_behaviour.py b/weave/tests/trace/test_op_decorator_behaviour.py
similarity index 100%
rename from weave/tests/test_op_decorator_behaviour.py
rename to weave/tests/trace/test_op_decorator_behaviour.py
diff --git a/weave/tests/test_op_versioning.py b/weave/tests/trace/test_op_versioning.py
similarity index 98%
rename from weave/tests/test_op_versioning.py
rename to weave/tests/trace/test_op_versioning.py
index 27152ad316f..f1f2203a323 100644
--- a/weave/tests/test_op_versioning.py
+++ b/weave/tests/trace/test_op_versioning.py
@@ -71,7 +71,7 @@ def solo_versioned_op(a: int) -> float:
 
 
 def test_solo_op_versioning(strict_op_saving, client):
-    from . import op_versioning_solo
+    from weave.tests.trace import op_versioning_solo
 
     ref = weave.publish(op_versioning_solo.solo_versioned_op)
 
@@ -94,7 +94,7 @@ def versioned_op(self, a: int) -> float:
 
 
 def test_object_op_versioning(strict_op_saving, client):
-    from . import op_versioning_obj
+    from weave.tests.trace import op_versioning_obj
 
     obj = op_versioning_obj.MyTestObjWithOp(val=5)
     # Call it to publish
@@ -118,7 +118,7 @@ def versioned_op_importfrom(a: int) -> float:
 
 
 def test_op_versioning_importfrom(strict_op_saving, client):
-    from . import op_versioning_importfrom
+    from weave.tests.trace import op_versioning_importfrom
 
     ref = weave.publish(op_versioning_importfrom.versioned_op_importfrom)
     saved_code = get_saved_code(client, ref)
@@ -146,7 +146,7 @@ def versioned_op_lowerlevel(a: int) -> float:
 
 
 def test_op_versioning_inline_import(strict_op_saving, client):
-    from . import op_versioning_inlineimport
+    from weave.tests.trace import op_versioning_inlineimport
 
 
 def test_op_versioning_inline_func_decl(strict_op_saving):
diff --git a/weave/tests/test_ref_trace.py b/weave/tests/trace/test_ref_trace.py
similarity index 100%
rename from weave/tests/test_ref_trace.py
rename to weave/tests/trace/test_ref_trace.py
diff --git a/weave/tests/test_server.py b/weave/tests/trace/test_server.py
similarity index 92%
rename from weave/tests/test_server.py
rename to weave/tests/trace/test_server.py
index 1d0bd8fdaef..4298adf0dea 100644
--- a/weave/tests/test_server.py
+++ b/weave/tests/trace/test_server.py
@@ -6,14 +6,13 @@
 import pytest
 import requests
 
+from weave import api as weave
+from weave import server as _server
+from weave import weave_types as types
 from weave.legacy import client as _client
 from weave.legacy import context_state, ops
 from weave.legacy.decorator_op import op
-
-from .. import api as weave
-from .. import server as _server
-from .. import weave_types as types
-from ..weave_internal import make_const_node
+from weave.weave_internal import make_const_node
 
 SERVER_TYPES = ["inprocess", "subprocess", "http"]
 
diff --git a/weave/tests/test_trace_server.py b/weave/tests/trace/test_trace_server.py
similarity index 100%
rename from weave/tests/test_trace_server.py
rename to weave/tests/trace/test_trace_server.py
diff --git a/weave/tests/test_trace_settings.py b/weave/tests/trace/test_trace_settings.py
similarity index 100%
rename from weave/tests/test_trace_settings.py
rename to weave/tests/trace/test_trace_settings.py
diff --git a/weave/tests/test_vals.py b/weave/tests/trace/test_vals.py
similarity index 100%
rename from weave/tests/test_vals.py
rename to weave/tests/trace/test_vals.py
diff --git a/weave/tests/test_weave_client.py b/weave/tests/trace/test_weave_client.py
similarity index 99%
rename from weave/tests/test_weave_client.py
rename to weave/tests/trace/test_weave_client.py
index 6fc102ce400..7a065f243d2 100644
--- a/weave/tests/test_weave_client.py
+++ b/weave/tests/trace/test_weave_client.py
@@ -1324,3 +1324,13 @@ def test_summary_tokens_cost_sqlite(client):
 
     assert noCostCallSummary is None
     assert withCostCallSummary is None
+
+
+def test_ref_in_dict(client):
+    ref = client._save_object({"a": 5}, "d1")
+
+    # Put a ref directly in a dict.
+    ref2 = client._save_object({"b": ref}, "d2")
+
+    obj = weave.ref(ref2.uri()).get()
+    assert obj["b"] == {"a": 5}
diff --git a/weave/tests/test_weave_client_mutations.py b/weave/tests/trace/test_weave_client_mutations.py
similarity index 100%
rename from weave/tests/test_weave_client_mutations.py
rename to weave/tests/trace/test_weave_client_mutations.py
diff --git a/weave/tests/test_weave_client_threaded.py b/weave/tests/trace/test_weave_client_threaded.py
similarity index 100%
rename from weave/tests/test_weave_client_threaded.py
rename to weave/tests/trace/test_weave_client_threaded.py
diff --git a/weave/tests/test_weaveflow.py b/weave/tests/trace/test_weaveflow.py
similarity index 99%
rename from weave/tests/test_weaveflow.py
rename to weave/tests/trace/test_weaveflow.py
index 162c5bd3735..ca3218dc98f 100644
--- a/weave/tests/test_weaveflow.py
+++ b/weave/tests/trace/test_weaveflow.py
@@ -5,8 +5,7 @@
 from pydantic import Field
 
 import weave
-
-from .. import ref_base
+from weave import ref_base
 
 pytestmark = pytest.mark.trace
 
diff --git a/weave/tests/trace_server_clickhouse_conftest.py b/weave/tests/trace/trace_server_clickhouse_conftest.py
similarity index 97%
rename from weave/tests/trace_server_clickhouse_conftest.py
rename to weave/tests/trace/trace_server_clickhouse_conftest.py
index b21cddb99e3..23b4b6e0371 100644
--- a/weave/tests/trace_server_clickhouse_conftest.py
+++ b/weave/tests/trace/trace_server_clickhouse_conftest.py
@@ -10,16 +10,13 @@
 import requests
 
 from weave import weave_client
-from weave.trace_server import environment as wf_env
-
-from ..trace_server import (
+from weave.trace_server import (
     clickhouse_trace_server_batched,
     external_to_internal_trace_server_adapter,
 )
-from ..trace_server import (
-    trace_server_interface as tsi,
-)
-from ..weave_init import InitializedClient
+from weave.trace_server import environment as wf_env
+from weave.trace_server import trace_server_interface as tsi
+from weave.weave_init import InitializedClient
 
 
 @pytest.fixture(scope="session")
diff --git a/weave/tests/trace_weaveflow.py b/weave/tests/trace/trace_weaveflow.py
similarity index 100%
rename from weave/tests/trace_weaveflow.py
rename to weave/tests/trace/trace_weaveflow.py
diff --git a/weave/trace_api.py b/weave/trace_api.py
index 693b9dcaa8b..22a8b87af60 100644
--- a/weave/trace_api.py
+++ b/weave/trace_api.py
@@ -9,7 +9,15 @@
 from weave.call_context import get_current_call
 from weave.client_context import weave_client as weave_client_context
 
-from . import urls, util, weave_client, weave_init
+# TODO: type_serializers is imported here to trigger registration of the image serializer.
+# There is probably a better place for this, but including here for now to get the fix in.
+from . import (
+    type_serializers,  # noqa: F401
+    urls,
+    util,
+    weave_client,
+    weave_init,
+)
 from .table import Table
 from .trace import context
 from .trace.constants import TRACE_OBJECT_EMOJI
diff --git a/weave/trace_server/calls_query_builder.py b/weave/trace_server/calls_query_builder.py
index a23e05b1005..b737b001719 100644
--- a/weave/trace_server/calls_query_builder.py
+++ b/weave/trace_server/calls_query_builder.py
@@ -609,6 +609,10 @@ def process_operation(operation: tsi_query.Operation) -> str:
             lhs_part = process_operand(operation.gte_[0])
             rhs_part = process_operand(operation.gte_[1])
             cond = f"({lhs_part} >= {rhs_part})"
+        elif isinstance(operation, tsi_query.InOperation):
+            lhs_part = process_operand(operation.in_[0])
+            rhs_part = ",".join(process_operand(op) for op in operation.in_[1])
+            cond = f"({lhs_part} IN ({rhs_part}))"
         elif isinstance(operation, tsi_query.ContainsOperation):
             lhs_part = process_operand(operation.contains_.input)
             rhs_part = process_operand(operation.contains_.substr)
@@ -644,6 +648,7 @@ def process_operand(operand: "tsi_query.Operand") -> str:
                 tsi_query.EqOperation,
                 tsi_query.GtOperation,
                 tsi_query.GteOperation,
+                tsi_query.InOperation,
                 tsi_query.ContainsOperation,
             ),
         ):
diff --git a/weave/trace_server/clickhouse_schema.py b/weave/trace_server/clickhouse_schema.py
index f41ba84cca3..1c0e3e62db8 100644
--- a/weave/trace_server/clickhouse_schema.py
+++ b/weave/trace_server/clickhouse_schema.py
@@ -1,7 +1,7 @@
 import datetime
 import typing
 
-from pydantic import BaseModel, field_validator
+from pydantic import BaseModel, Field, field_validator
 
 from . import validation
 
@@ -16,7 +16,9 @@ class CallStartCHInsertable(BaseModel):
     attributes_dump: str
     inputs_dump: str
     input_refs: typing.List[str]
-    output_refs: typing.List[str] = []  # sadly, this is required
+    output_refs: typing.List[str] = Field(
+        default_factory=list
+    )  # sadly, this is required
     display_name: typing.Optional[str] = None
 
     wb_user_id: typing.Optional[str] = None
@@ -41,7 +43,9 @@ class CallEndCHInsertable(BaseModel):
     exception: typing.Optional[str] = None
     summary_dump: str
     output_dump: str
-    input_refs: typing.List[str] = []  # sadly, this is required
+    input_refs: typing.List[str] = Field(
+        default_factory=list
+    )  # sadly, this is required
     output_refs: typing.List[str]
 
     _project_id_v = field_validator("project_id")(validation.project_id_validator)
@@ -58,8 +62,8 @@ class CallDeleteCHInsertable(BaseModel):
     deleted_at: datetime.datetime
 
     # required types
-    input_refs: typing.List[str] = []
-    output_refs: typing.List[str] = []
+    input_refs: typing.List[str] = Field(default_factory=list)
+    output_refs: typing.List[str] = Field(default_factory=list)
 
     _project_id_v = field_validator("project_id")(validation.project_id_validator)
     _id_v = field_validator("id")(validation.call_id_validator)
@@ -77,8 +81,8 @@ class CallUpdateCHInsertable(BaseModel):
     display_name: typing.Optional[str] = None
 
     # required types
-    input_refs: typing.List[str] = []
-    output_refs: typing.List[str] = []
+    input_refs: typing.List[str] = Field(default_factory=list)
+    output_refs: typing.List[str] = Field(default_factory=list)
 
     _project_id_v = field_validator("project_id")(validation.project_id_validator)
     _id_v = field_validator("id")(validation.call_id_validator)
diff --git a/weave/trace_server/interface/query.py b/weave/trace_server/interface/query.py
index d838025312d..943a990c54d 100644
--- a/weave/trace_server/interface/query.py
+++ b/weave/trace_server/interface/query.py
@@ -121,6 +121,11 @@ class GteOperation(BaseModel):
     gte_: typing.Tuple["Operand", "Operand"] = Field(alias="$gte")
 
 
+# https://www.mongodb.com/docs/manual/reference/operator/aggregation/in/
+class InOperation(BaseModel):
+    in_: typing.Tuple["Operand", list["Operand"]] = Field(alias="$in")
+
+
 # This is not technically in the Mongo spec. Mongo has:
 # https://www.mongodb.com/docs/manual/reference/operator/aggregation/regexMatch/,
 # however, rather than support a full regex match right now, we will
@@ -143,6 +148,7 @@ class ContainsSpec(BaseModel):
     EqOperation,
     GtOperation,
     GteOperation,
+    InOperation,
     ContainsOperation,
 ]
 Operand = typing.Union[
@@ -159,4 +165,5 @@ class ContainsSpec(BaseModel):
 EqOperation.model_rebuild()
 GtOperation.model_rebuild()
 GteOperation.model_rebuild()
+InOperation.model_rebuild()
 ContainsOperation.model_rebuild()
diff --git a/weave/trace_server/orm.py b/weave/trace_server/orm.py
index 39eab0d87dd..c971cd145ba 100644
--- a/weave/trace_server/orm.py
+++ b/weave/trace_server/orm.py
@@ -626,6 +626,10 @@ def process_operation(operation: tsi_query.Operation) -> str:
             lhs_part = process_operand(operation.gte_[0])
             rhs_part = process_operand(operation.gte_[1])
             cond = f"({lhs_part} >= {rhs_part})"
+        elif isinstance(operation, tsi_query.InOperation):
+            lhs_part = process_operand(operation.in_[0])
+            rhs_part = ",".join(process_operand(op) for op in operation.in_[1])
+            cond = f"({lhs_part} IN ({rhs_part}))"
         elif isinstance(operation, tsi_query.ContainsOperation):
             lhs_part = process_operand(operation.contains_.input)
             rhs_part = process_operand(operation.contains_.substr)
@@ -665,6 +669,7 @@ def process_operand(operand: tsi_query.Operand) -> str:
                 tsi_query.EqOperation,
                 tsi_query.GtOperation,
                 tsi_query.GteOperation,
+                tsi_query.InOperation,
                 tsi_query.ContainsOperation,
             ),
         ):
diff --git a/weave/trace_server/sqlite_trace_server.py b/weave/trace_server/sqlite_trace_server.py
index 82a512bcfc7..23861707182 100644
--- a/weave/trace_server/sqlite_trace_server.py
+++ b/weave/trace_server/sqlite_trace_server.py
@@ -325,6 +325,10 @@ def process_operation(operation: tsi_query.Operation) -> str:
                     lhs_part = process_operand(operation.gte_[0])
                     rhs_part = process_operand(operation.gte_[1])
                     cond = f"({lhs_part} >= {rhs_part})"
+                elif isinstance(operation, tsi_query.InOperation):
+                    lhs_part = process_operand(operation.in_[0])
+                    rhs_part = ",".join(process_operand(op) for op in operation.in_[1])
+                    cond = f"({lhs_part} IN ({rhs_part}))"
                 elif isinstance(operation, tsi_query.ContainsOperation):
                     lhs_part = process_operand(operation.contains_.input)
                     rhs_part = process_operand(operation.contains_.substr)
@@ -368,6 +372,7 @@ def process_operand(operand: tsi_query.Operand) -> str:
                         tsi_query.EqOperation,
                         tsi_query.GtOperation,
                         tsi_query.GteOperation,
+                        tsi_query.InOperation,
                         tsi_query.ContainsOperation,
                     ),
                 ):
diff --git a/weave/trace_server/trace_server_interface.py b/weave/trace_server/trace_server_interface.py
index 2df5e911467..a4ad1625c9b 100644
--- a/weave/trace_server/trace_server_interface.py
+++ b/weave/trace_server/trace_server_interface.py
@@ -1,6 +1,5 @@
-import abc
 import datetime
-import typing
+from typing import Any, Dict, Iterator, List, Literal, Optional, Protocol, Union
 
 from pydantic import BaseModel, ConfigDict, Field, field_serializer
 from typing_extensions import TypedDict
@@ -21,42 +20,42 @@ class ExtraKeysTypedDict(TypedDict):
 
 
 class LLMUsageSchema(TypedDict, total=False):
-    prompt_tokens: typing.Optional[int]
-    input_tokens: typing.Optional[int]
-    completion_tokens: typing.Optional[int]
-    output_tokens: typing.Optional[int]
-    requests: typing.Optional[int]
-    total_tokens: typing.Optional[int]
+    prompt_tokens: Optional[int]
+    input_tokens: Optional[int]
+    completion_tokens: Optional[int]
+    output_tokens: Optional[int]
+    requests: Optional[int]
+    total_tokens: Optional[int]
 
 
 class LLMCostSchema(LLMUsageSchema):
-    prompt_tokens_cost: typing.Optional[float]
-    completion_tokens_cost: typing.Optional[float]
-    prompt_token_cost: typing.Optional[float]
-    completion_token_cost: typing.Optional[float]
-    prompt_token_cost_unit: typing.Optional[str]
-    completion_token_cost_unit: typing.Optional[str]
-    effective_date: typing.Optional[str]
-    provider_id: typing.Optional[str]
-    pricing_level: typing.Optional[str]
-    pricing_level_id: typing.Optional[str]
-    created_at: typing.Optional[str]
-    created_by: typing.Optional[str]
+    prompt_tokens_cost: Optional[float]
+    completion_tokens_cost: Optional[float]
+    prompt_token_cost: Optional[float]
+    completion_token_cost: Optional[float]
+    prompt_token_cost_unit: Optional[str]
+    completion_token_cost_unit: Optional[str]
+    effective_date: Optional[str]
+    provider_id: Optional[str]
+    pricing_level: Optional[str]
+    pricing_level_id: Optional[str]
+    created_at: Optional[str]
+    created_by: Optional[str]
 
 
 class WeaveSummarySchema(ExtraKeysTypedDict, total=False):
-    status: typing.Optional[typing.Literal["success", "error", "running"]]
-    nice_trace_name: typing.Optional[str]
-    latency: typing.Optional[int]
-    costs: typing.Optional[typing.Dict[str, LLMCostSchema]]
+    status: Optional[Literal["success", "error", "running"]]
+    nice_trace_name: Optional[str]
+    latency: Optional[int]
+    costs: Optional[Dict[str, LLMCostSchema]]
 
 
 class SummaryInsertMap(ExtraKeysTypedDict, total=False):
-    usage: typing.Dict[str, LLMUsageSchema]
+    usage: Dict[str, LLMUsageSchema]
 
 
 class SummaryMap(SummaryInsertMap, total=False):
-    weave: typing.Optional[WeaveSummarySchema]
+    weave: Optional[WeaveSummarySchema]
 
 
 class CallSchema(BaseModel):
@@ -66,43 +65,41 @@ class CallSchema(BaseModel):
     # Name of the calling function (op)
     op_name: str
     # Optional display name of the call
-    display_name: typing.Optional[str] = None
+    display_name: Optional[str] = None
 
-    ## Trace ID
+    # Trace ID
     trace_id: str
-    ## Parent ID is optional because the call may be a root
-    parent_id: typing.Optional[str] = None
+    # Parent ID is optional because the call may be a root
+    parent_id: Optional[str] = None
 
-    ## Start time is required
+    # Start time is required
     started_at: datetime.datetime
-    ## Attributes: properties of the call
-    attributes: typing.Dict[str, typing.Any]
+    # Attributes: properties of the call
+    attributes: Dict[str, Any]
 
-    ## Inputs
-    inputs: typing.Dict[str, typing.Any]
+    # Inputs
+    inputs: Dict[str, Any]
 
-    ## End time is required if finished
-    ended_at: typing.Optional[datetime.datetime] = None
+    # End time is required if finished
+    ended_at: Optional[datetime.datetime] = None
 
-    ## Exception is present if the call failed
-    exception: typing.Optional[str] = None
+    # Exception is present if the call failed
+    exception: Optional[str] = None
 
-    ## Outputs
-    output: typing.Optional[typing.Any] = None
+    # Outputs
+    output: Optional[Any] = None
 
-    ## Summary: a summary of the call
-    summary: typing.Optional[SummaryMap] = None
+    # Summary: a summary of the call
+    summary: Optional[SummaryMap] = None
 
     # WB Metadata
-    wb_user_id: typing.Optional[str] = None
-    wb_run_id: typing.Optional[str] = None
+    wb_user_id: Optional[str] = None
+    wb_run_id: Optional[str] = None
 
-    deleted_at: typing.Optional[datetime.datetime] = None
+    deleted_at: Optional[datetime.datetime] = None
 
     @field_serializer("attributes", "summary", when_used="unless-none")
-    def serialize_typed_dicts(
-        self, v: typing.Dict[str, typing.Any]
-    ) -> typing.Dict[str, typing.Any]:
+    def serialize_typed_dicts(self, v: Dict[str, Any]) -> Dict[str, Any]:
         return dict(v)
 
 
@@ -111,51 +108,49 @@ def serialize_typed_dicts(
 # - trace_id is not required (will be generated)
 class StartedCallSchemaForInsert(BaseModel):
     project_id: str
-    id: typing.Optional[str] = None  # Will be generated if not provided
+    id: Optional[str] = None  # Will be generated if not provided
 
     # Name of the calling function (op)
     op_name: str
     # Optional display name of the call
-    display_name: typing.Optional[str] = None
+    display_name: Optional[str] = None
 
-    ## Trace ID
-    trace_id: typing.Optional[str] = None  # Will be generated if not provided
-    ## Parent ID is optional because the call may be a root
-    parent_id: typing.Optional[str] = None
+    # Trace ID
+    trace_id: Optional[str] = None  # Will be generated if not provided
+    # Parent ID is optional because the call may be a root
+    parent_id: Optional[str] = None
 
-    ## Start time is required
+    # Start time is required
     started_at: datetime.datetime
-    ## Attributes: properties of the call
-    attributes: typing.Dict[str, typing.Any]
+    # Attributes: properties of the call
+    attributes: Dict[str, Any]
 
-    ## Inputs
-    inputs: typing.Dict[str, typing.Any]
+    # Inputs
+    inputs: Dict[str, Any]
 
     # WB Metadata
-    wb_user_id: typing.Optional[str] = Field(None, description=WB_USER_ID_DESCRIPTION)
-    wb_run_id: typing.Optional[str] = None
+    wb_user_id: Optional[str] = Field(None, description=WB_USER_ID_DESCRIPTION)
+    wb_run_id: Optional[str] = None
 
 
 class EndedCallSchemaForInsert(BaseModel):
     project_id: str
     id: str
 
-    ## End time is required
+    # End time is required
     ended_at: datetime.datetime
 
-    ## Exception is present if the call failed
-    exception: typing.Optional[str] = None
+    # Exception is present if the call failed
+    exception: Optional[str] = None
 
-    ## Outputs
-    output: typing.Optional[typing.Any] = None
+    # Outputs
+    output: Optional[Any] = None
 
-    ## Summary: a summary of the call
+    # Summary: a summary of the call
     summary: SummaryInsertMap
 
     @field_serializer("summary")
-    def serialize_typed_dicts(
-        self, v: typing.Dict[str, typing.Any]
-    ) -> typing.Dict[str, typing.Any]:
+    def serialize_typed_dicts(self, v: Dict[str, Any]) -> Dict[str, Any]:
         return dict(v)
 
 
@@ -163,24 +158,24 @@ class ObjSchema(BaseModel):
     project_id: str
     object_id: str
     created_at: datetime.datetime
-    deleted_at: typing.Optional[datetime.datetime] = None
+    deleted_at: Optional[datetime.datetime] = None
     digest: str
     version_index: int
     is_latest: int
     kind: str
-    base_object_class: typing.Optional[str]
-    val: typing.Any
+    base_object_class: Optional[str]
+    val: Any
 
 
 class ObjSchemaForInsert(BaseModel):
     project_id: str
     object_id: str
-    val: typing.Any
+    val: Any
 
 
 class TableSchemaForInsert(BaseModel):
     project_id: str
-    rows: list[dict[str, typing.Any]]
+    rows: list[dict[str, Any]]
 
 
 class CallStartReq(BaseModel):
@@ -203,19 +198,19 @@ class CallEndRes(BaseModel):
 class CallReadReq(BaseModel):
     project_id: str
     id: str
-    include_costs: typing.Optional[bool] = False
+    include_costs: Optional[bool] = False
 
 
 class CallReadRes(BaseModel):
-    call: typing.Optional[CallSchema]
+    call: Optional[CallSchema]
 
 
 class CallsDeleteReq(BaseModel):
     project_id: str
-    call_ids: typing.List[str]
+    call_ids: List[str]
 
     # wb_user_id is automatically populated by the server
-    wb_user_id: typing.Optional[str] = Field(None, description=WB_USER_ID_DESCRIPTION)
+    wb_user_id: Optional[str] = Field(None, description=WB_USER_ID_DESCRIPTION)
 
 
 class CallsDeleteRes(BaseModel):
@@ -223,15 +218,15 @@ class CallsDeleteRes(BaseModel):
 
 
 class CallsFilter(BaseModel):
-    op_names: typing.Optional[typing.List[str]] = None
-    input_refs: typing.Optional[typing.List[str]] = None
-    output_refs: typing.Optional[typing.List[str]] = None
-    parent_ids: typing.Optional[typing.List[str]] = None
-    trace_ids: typing.Optional[typing.List[str]] = None
-    call_ids: typing.Optional[typing.List[str]] = None
-    trace_roots_only: typing.Optional[bool] = None
-    wb_user_ids: typing.Optional[typing.List[str]] = None
-    wb_run_ids: typing.Optional[typing.List[str]] = None
+    op_names: Optional[List[str]] = None
+    input_refs: Optional[List[str]] = None
+    output_refs: Optional[List[str]] = None
+    parent_ids: Optional[List[str]] = None
+    trace_ids: Optional[List[str]] = None
+    call_ids: Optional[List[str]] = None
+    trace_roots_only: Optional[bool] = None
+    wb_user_ids: Optional[List[str]] = None
+    wb_run_ids: Optional[List[str]] = None
 
 
 class SortBy(BaseModel):
@@ -240,32 +235,32 @@ class SortBy(BaseModel):
     # dot-separated.
     field: str  # Consider changing this to _FieldSelect
     # Direction should be either 'asc' or 'desc'
-    direction: typing.Literal["asc", "desc"]
+    direction: Literal["asc", "desc"]
 
 
 class CallsQueryReq(BaseModel):
     project_id: str
-    filter: typing.Optional[CallsFilter] = None
-    limit: typing.Optional[int] = None
-    offset: typing.Optional[int] = None
+    filter: Optional[CallsFilter] = None
+    limit: Optional[int] = None
+    offset: Optional[int] = None
     # Sort by multiple fields
-    sort_by: typing.Optional[typing.List[SortBy]] = None
-    query: typing.Optional[Query] = None
-    include_costs: typing.Optional[bool] = False
+    sort_by: Optional[List[SortBy]] = None
+    query: Optional[Query] = None
+    include_costs: Optional[bool] = False
 
     # TODO: type this with call schema columns, following the same rules as
     # SortBy and thus GetFieldOperator.get_field_ (without direction)
-    columns: typing.Optional[typing.List[str]] = None
+    columns: Optional[List[str]] = None
 
 
 class CallsQueryRes(BaseModel):
-    calls: typing.List[CallSchema]
+    calls: List[CallSchema]
 
 
 class CallsQueryStatsReq(BaseModel):
     project_id: str
-    filter: typing.Optional[CallsFilter] = None
-    query: typing.Optional[Query] = None
+    filter: Optional[CallsFilter] = None
+    query: Optional[Query] = None
 
 
 class CallsQueryStatsRes(BaseModel):
@@ -278,10 +273,10 @@ class CallUpdateReq(BaseModel):
     call_id: str
 
     # optional update fields
-    display_name: typing.Optional[str] = None
+    display_name: Optional[str] = None
 
     # wb_user_id is automatically populated by the server
-    wb_user_id: typing.Optional[str] = Field(None, description=WB_USER_ID_DESCRIPTION)
+    wb_user_id: Optional[str] = Field(None, description=WB_USER_ID_DESCRIPTION)
 
 
 class CallUpdateRes(BaseModel):
@@ -307,17 +302,17 @@ class OpReadRes(BaseModel):
 
 
 class OpVersionFilter(BaseModel):
-    op_names: typing.Optional[typing.List[str]] = None
-    latest_only: typing.Optional[bool] = None
+    op_names: Optional[List[str]] = None
+    latest_only: Optional[bool] = None
 
 
 class OpQueryReq(BaseModel):
     project_id: str
-    filter: typing.Optional[OpVersionFilter] = None
+    filter: Optional[OpVersionFilter] = None
 
 
 class OpQueryRes(BaseModel):
-    op_objs: typing.List[ObjSchema]
+    op_objs: List[ObjSchema]
 
 
 class ObjCreateReq(BaseModel):
@@ -339,19 +334,19 @@ class ObjReadRes(BaseModel):
 
 
 class ObjectVersionFilter(BaseModel):
-    base_object_classes: typing.Optional[typing.List[str]] = None
-    object_ids: typing.Optional[typing.List[str]] = None
-    is_op: typing.Optional[bool] = None
-    latest_only: typing.Optional[bool] = None
+    base_object_classes: Optional[List[str]] = None
+    object_ids: Optional[List[str]] = None
+    is_op: Optional[bool] = None
+    latest_only: Optional[bool] = None
 
 
 class ObjQueryReq(BaseModel):
     project_id: str
-    filter: typing.Optional[ObjectVersionFilter] = None
+    filter: Optional[ObjectVersionFilter] = None
 
 
 class ObjQueryRes(BaseModel):
-    objs: typing.List[ObjSchema]
+    objs: List[ObjSchema]
 
 
 class TableCreateReq(BaseModel):
@@ -410,7 +405,7 @@ class Table[OPERATION]Spec(BaseModel):
 
 
 class TableAppendSpecPayload(BaseModel):
-    row: dict[str, typing.Any]
+    row: dict[str, Any]
 
 
 class TableAppendSpec(BaseModel):
@@ -427,14 +422,14 @@ class TablePopSpec(BaseModel):
 
 class TableInsertSpecPayload(BaseModel):
     index: int
-    row: dict[str, typing.Any]
+    row: dict[str, Any]
 
 
 class TableInsertSpec(BaseModel):
     insert: TableInsertSpecPayload
 
 
-TableUpdateSpec = typing.Union[TableAppendSpec, TablePopSpec, TableInsertSpec]
+TableUpdateSpec = Union[TableAppendSpec, TablePopSpec, TableInsertSpec]
 
 
 class TableUpdateReq(BaseModel):
@@ -449,7 +444,7 @@ class TableUpdateRes(BaseModel):
 
 class TableRowSchema(BaseModel):
     digest: str
-    val: typing.Any
+    val: Any
 
 
 class TableCreateRes(BaseModel):
@@ -457,27 +452,27 @@ class TableCreateRes(BaseModel):
 
 
 class TableRowFilter(BaseModel):
-    row_digests: typing.Optional[typing.List[str]] = None
+    row_digests: Optional[List[str]] = None
 
 
 class TableQueryReq(BaseModel):
     project_id: str
     digest: str
-    filter: typing.Optional[TableRowFilter] = None
-    limit: typing.Optional[int] = None
-    offset: typing.Optional[int] = None
+    filter: Optional[TableRowFilter] = None
+    limit: Optional[int] = None
+    offset: Optional[int] = None
 
 
 class TableQueryRes(BaseModel):
-    rows: typing.List[TableRowSchema]
+    rows: List[TableRowSchema]
 
 
 class RefsReadBatchReq(BaseModel):
-    refs: typing.List[str]
+    refs: List[str]
 
 
 class RefsReadBatchRes(BaseModel):
-    vals: typing.List[typing.Any]
+    vals: List[Any]
 
 
 class FeedbackPayloadReactionReq(BaseModel):
@@ -491,9 +486,9 @@ class FeedbackPayloadNoteReq(BaseModel):
 class FeedbackCreateReq(BaseModel):
     project_id: str = Field(examples=["entity/project"])
     weave_ref: str = Field(examples=["weave:///entity/project/object/name:digest"])
-    creator: typing.Optional[str] = Field(default=None, examples=["Jane Smith"])
+    creator: Optional[str] = Field(default=None, examples=["Jane Smith"])
     feedback_type: str = Field(examples=["custom"])
-    payload: typing.Dict[str, typing.Any] = Field(
+    payload: Dict[str, Any] = Field(
         examples=[
             {
                 "key": "value",
@@ -502,7 +497,7 @@ class FeedbackCreateReq(BaseModel):
     )
 
     # wb_user_id is automatically populated by the server
-    wb_user_id: typing.Optional[str] = Field(None, description=WB_USER_ID_DESCRIPTION)
+    wb_user_id: Optional[str] = Field(None, description=WB_USER_ID_DESCRIPTION)
 
 
 # The response provides the additional fields needed to convert a request
@@ -511,7 +506,7 @@ class FeedbackCreateRes(BaseModel):
     id: str
     created_at: datetime.datetime
     wb_user_id: str
-    payload: typing.Dict[str, typing.Any]  # If not empty, replace payload
+    payload: Dict[str, Any]  # If not empty, replace payload
 
 
 class Feedback(FeedbackCreateReq):
@@ -521,20 +516,20 @@ class Feedback(FeedbackCreateReq):
 
 class FeedbackQueryReq(BaseModel):
     project_id: str = Field(examples=["entity/project"])
-    fields: typing.Optional[list[str]] = Field(
+    fields: Optional[list[str]] = Field(
         default=None, examples=[["id", "feedback_type", "payload.note"]]
     )
-    query: typing.Optional[Query] = None
+    query: Optional[Query] = None
     # TODO: I think I would prefer to call this order_by to match SQL, but this is what calls API uses
     # TODO: Might be nice to have shortcut for single field and implied ASC direction
-    sort_by: typing.Optional[typing.List[SortBy]] = None
-    limit: typing.Optional[int] = Field(default=None, examples=[10])
-    offset: typing.Optional[int] = Field(default=None, examples=[0])
+    sort_by: Optional[List[SortBy]] = None
+    limit: Optional[int] = Field(default=None, examples=[10])
+    offset: Optional[int] = Field(default=None, examples=[0])
 
 
 class FeedbackQueryRes(BaseModel):
     # Note: this is not a list of Feedback because user can request any fields.
-    result: list[dict[str, typing.Any]]
+    result: list[dict[str, Any]]
 
 
 class FeedbackPurgeReq(BaseModel):
@@ -569,116 +564,45 @@ class EnsureProjectExistsRes(BaseModel):
     project_name: str
 
 
-class TraceServerInterface:
+class TraceServerInterface(Protocol):
     def ensure_project_exists(
         self, entity: str, project: str
     ) -> EnsureProjectExistsRes:
         return EnsureProjectExistsRes(project_name=project)
 
     # Call API
-    @abc.abstractmethod
-    def call_start(self, req: CallStartReq) -> CallStartRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def call_end(self, req: CallEndReq) -> CallEndRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def call_read(self, req: CallReadReq) -> CallReadRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def calls_query(self, req: CallsQueryReq) -> CallsQueryRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def calls_query_stream(self, req: CallsQueryReq) -> typing.Iterator[CallSchema]:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def calls_delete(self, req: CallsDeleteReq) -> CallsDeleteRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def calls_query_stats(self, req: CallsQueryStatsReq) -> CallsQueryStatsRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def call_update(self, req: CallUpdateReq) -> CallUpdateRes:
-        raise NotImplementedError()
+    def call_start(self, req: CallStartReq) -> CallStartRes: ...
+    def call_end(self, req: CallEndReq) -> CallEndRes: ...
+    def call_read(self, req: CallReadReq) -> CallReadRes: ...
+    def calls_query(self, req: CallsQueryReq) -> CallsQueryRes: ...
+    def calls_query_stream(self, req: CallsQueryReq) -> Iterator[CallSchema]: ...
+    def calls_delete(self, req: CallsDeleteReq) -> CallsDeleteRes: ...
+    def calls_query_stats(self, req: CallsQueryStatsReq) -> CallsQueryStatsRes: ...
+    def call_update(self, req: CallUpdateReq) -> CallUpdateRes: ...
 
     # Op API
-    @abc.abstractmethod
-    def op_create(self, req: OpCreateReq) -> OpCreateRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def op_read(self, req: OpReadReq) -> OpReadRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def ops_query(self, req: OpQueryReq) -> OpQueryRes:
-        raise NotImplementedError()
+    def op_create(self, req: OpCreateReq) -> OpCreateRes: ...
+    def op_read(self, req: OpReadReq) -> OpReadRes: ...
+    def ops_query(self, req: OpQueryReq) -> OpQueryRes: ...
 
     # Obj API
-    @abc.abstractmethod
-    def obj_create(self, req: ObjCreateReq) -> ObjCreateRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def obj_read(self, req: ObjReadReq) -> ObjReadRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def objs_query(self, req: ObjQueryReq) -> ObjQueryRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def table_create(self, req: TableCreateReq) -> TableCreateRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def table_update(self, req: TableUpdateReq) -> TableUpdateRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def table_query(self, req: TableQueryReq) -> TableQueryRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def refs_read_batch(self, req: RefsReadBatchReq) -> RefsReadBatchRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def file_create(self, req: FileCreateReq) -> FileCreateRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def file_content_read(self, req: FileContentReadReq) -> FileContentReadRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def feedback_create(self, req: FeedbackCreateReq) -> FeedbackCreateRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def feedback_query(self, req: FeedbackQueryReq) -> FeedbackQueryRes:
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def feedback_purge(self, req: FeedbackPurgeReq) -> FeedbackPurgeRes:
-        raise NotImplementedError()
+    def obj_create(self, req: ObjCreateReq) -> ObjCreateRes: ...
+    def obj_read(self, req: ObjReadReq) -> ObjReadRes: ...
+    def objs_query(self, req: ObjQueryReq) -> ObjQueryRes: ...
+    def table_create(self, req: TableCreateReq) -> TableCreateRes: ...
+    def table_update(self, req: TableUpdateReq) -> TableUpdateRes: ...
+    def table_query(self, req: TableQueryReq) -> TableQueryRes: ...
+    def refs_read_batch(self, req: RefsReadBatchReq) -> RefsReadBatchRes: ...
+    def file_create(self, req: FileCreateReq) -> FileCreateRes: ...
+    def file_content_read(self, req: FileContentReadReq) -> FileContentReadRes: ...
+    def feedback_create(self, req: FeedbackCreateReq) -> FeedbackCreateRes: ...
+    def feedback_query(self, req: FeedbackQueryReq) -> FeedbackQueryRes: ...
+    def feedback_purge(self, req: FeedbackPurgeReq) -> FeedbackPurgeRes: ...
 
 
 # These symbols are used in the WB Trace Server and it is not safe
 # to remove them, else it will break the server. Once the server
 # is updated to use the new symbols, these can be removed.
-#
-# Remove once https://github.com/wandb/core/pull/22040 lands
-CallsDeleteReqForInsert = CallsDeleteReq
-CallUpdateReqForInsert = CallUpdateReq
-FeedbackCreateReqForInsert = FeedbackCreateReq
 
 # Legacy Names (i think these might be used in a few growth examples, so keeping
 # around until we clean those up of them)
diff --git a/weave/weave_client.py b/weave/weave_client.py
index c1ce37dd147..c0fb9b60d8e 100644
--- a/weave/weave_client.py
+++ b/weave/weave_client.py
@@ -104,6 +104,8 @@ def _get_direct_ref(obj: Any) -> Optional[Ref]:
 
 
 def map_to_refs(obj: Any) -> Any:
+    if isinstance(obj, Ref):
+        return obj
     if ref := _get_direct_ref(obj):
         return ref
 
@@ -802,7 +804,7 @@ def _save_nested_objects(self, obj: Any, name: Optional[str] = None) -> Any:
                 self._save_nested_objects(v)
             ref = self._save_object_basic(obj_rec, name or get_obj_name(obj_rec))
             obj.__dict__["ref"] = ref
-        elif dataclasses.is_dataclass(obj):
+        elif dataclasses.is_dataclass(obj) and not isinstance(obj, Ref):
             obj_rec = dataclass_object_record(obj)
             for v in obj_rec.__dict__.values():
                 self._save_nested_objects(v)
diff --git a/weave/weave_types.py b/weave/weave_types.py
index a1b56e11c88..e297bef18a3 100644
--- a/weave/weave_types.py
+++ b/weave/weave_types.py
@@ -9,6 +9,7 @@
 from collections.abc import Iterable
 
 import pydantic
+from dateutil.parser import isoparse
 
 from weave.legacy import box, context_state, mappers_python, object_type_ref_util
 from weave.legacy import timestamp as weave_timestamp
@@ -722,9 +723,7 @@ class Timestamp(Type):
     instance_classes = datetime.datetime
 
     def from_isostring(self, iso: str) -> datetime.datetime:
-        # NOTE: This assumes ISO 8601 format from GQL endpoints, it does NOT
-        # support RFC 3339 strings with a "Z" at the end before python 3.11
-        tz_naive = datetime.datetime.fromisoformat(iso)
+        tz_naive = isoparse(iso)
         return tz_naive.replace(tzinfo=datetime.timezone.utc)
 
     def save_instance(self, obj, artifact, name):