diff --git a/tests/scorers/test_coherence_scorer.py b/tests/scorers/test_coherence_scorer.py
index 36770ef7b5e..7bf7439fdfd 100644
--- a/tests/scorers/test_coherence_scorer.py
+++ b/tests/scorers/test_coherence_scorer.py
@@ -1,4 +1,5 @@
 import pytest
+from unittest.mock import MagicMock
 
 import weave
 from weave.scorers.coherence_scorer import CoherenceScorer
@@ -7,26 +8,33 @@
 
 @pytest.fixture
 def coherence_scorer(monkeypatch):
+    # Mock model loading
+    mock_model = MagicMock()
+    mock_tokenizer = MagicMock()
+    monkeypatch.setattr("transformers.AutoModelForSequenceClassification.from_pretrained", lambda *args, **kwargs: mock_model)
+    monkeypatch.setattr("transformers.AutoTokenizer.from_pretrained", lambda *args, **kwargs: mock_tokenizer)
+
+    # Mock wandb login and project
+    monkeypatch.setattr("wandb.login", lambda *args, **kwargs: True)
+    mock_project = MagicMock()
+    monkeypatch.setattr("wandb.Api", lambda: MagicMock(project=lambda *args: mock_project))
+
     scorer = CoherenceScorer(
-        model_name="wandb/coherence_scorer",
+        model_name_or_path="wandb/coherence_scorer",
         device="cpu",
+        name="test-coherence",
+        description="Test coherence scorer",
+        column_map={"output": "text"}
     )
 
     def mock_pipeline(*args, **kwargs):
         def inner(inputs):
-            if "incoherent" in inputs["text_pair"] or "incoherent" in inputs["text"]:
-                return {
-                    "label": "incoherent",
-                    "score": 0.2,
-                }
-            return {
-                "label": "coherent",
-                "score": 0.95,
-            }
-
+            if "incoherent" in str(inputs.get("text_pair", "")) or "incoherent" in str(inputs.get("text", "")):
+                return {"label": "Completely Incoherent", "score": 0.2}
+            return {"label": "Perfectly Coherent", "score": 0.95}
         return inner
 
-    monkeypatch.setattr(scorer, "_classifier", mock_pipeline())
+    monkeypatch.setattr("transformers.pipeline", mock_pipeline)
     return scorer
 
 
diff --git a/tests/scorers/test_context_relevance_scorer.py b/tests/scorers/test_context_relevance_scorer.py
index d58d4002b3d..5f84122a0f5 100644
--- a/tests/scorers/test_context_relevance_scorer.py
+++ b/tests/scorers/test_context_relevance_scorer.py
@@ -1,13 +1,34 @@
 """Tests for the Context Relevance Scorer."""
 import pytest
+from unittest.mock import MagicMock
 from weave.scorers.context_relevance_scorer import ContextRelevanceScorer
 from tests.scorers.test_utils import generate_large_text, generate_context_and_output
 
 
 @pytest.fixture
-def context_relevance_scorer():
+def context_relevance_scorer(monkeypatch):
     """Create a context relevance scorer for testing."""
-    return ContextRelevanceScorer()
+    # Mock wandb login and project
+    monkeypatch.setattr("wandb.login", lambda *args, **kwargs: True)
+    mock_project = MagicMock()
+    monkeypatch.setattr("wandb.Api", lambda: MagicMock(project=lambda *args: mock_project))
+
+    scorer = ContextRelevanceScorer(
+        model_name_or_path="wandb/relevance_scorer",
+        device="cpu",
+        name="test-context-relevance",
+        description="Test context relevance scorer",
+        column_map={"output": "text", "context": "context"}
+    )
+
+    def mock_pipeline(*args, **kwargs):
+        def inner(text, **kwargs):
+            return [{"generated_text": '{"relevance": 4, "relevant": true}'}]
+        return inner
+
+    monkeypatch.setattr("transformers.pipeline", mock_pipeline)
+    monkeypatch.setattr(scorer, "_classifier", mock_pipeline())
+    return scorer
 
 
 @pytest.mark.asyncio
diff --git a/tests/scorers/test_faithfulness_scorer.py b/tests/scorers/test_faithfulness_scorer.py
new file mode 100644
index 00000000000..a375c3c99d5
--- /dev/null
+++ b/tests/scorers/test_faithfulness_scorer.py
@@ -0,0 +1,64 @@
+import pytest
+from unittest.mock import MagicMock, patch
+
+from weave.scorers.faithfulness_scorer import FaithfulnessScorer
+from tests.scorers.test_utils import generate_large_text
+
+
+@pytest.fixture
+def faithfulness_scorer(monkeypatch):
+    # Mock model loading
+    mock_model = MagicMock()
+    mock_tokenizer = MagicMock()
+    monkeypatch.setattr("transformers.AutoModelForSequenceClassification.from_pretrained", lambda *args, **kwargs: mock_model)
+    monkeypatch.setattr("transformers.AutoTokenizer.from_pretrained", lambda *args, **kwargs: mock_tokenizer)
+
+    # Mock wandb login and project
+    monkeypatch.setattr("wandb.login", lambda *args, **kwargs: True)
+    mock_project = MagicMock()
+    monkeypatch.setattr("wandb.Api", lambda: MagicMock(project=lambda *args: mock_project))
+
+    scorer = FaithfulnessScorer(
+        model_name_or_path="wandb/faithfulness_scorer",
+        device="cpu",
+        name="test-faithfulness",
+        description="Test faithfulness scorer",
+        column_map={"output": "text", "context": "context"}
+    )
+    return scorer
+
+
+@pytest.mark.asyncio
+async def test_faithfulness_scorer_inheritance():
+    from weave.scorers.hallucination_scorer import HallucinationScorer
+
+    scorer = FaithfulnessScorer(
+        model_name_or_path="wandb/faithfulness_scorer",
+        device="cpu",
+        name="test-faithfulness",
+        description="Test faithfulness scorer",
+        column_map={"output": "text", "context": "context"}
+    )
+    assert isinstance(scorer, HallucinationScorer)
+
+
+@pytest.mark.asyncio
+async def test_faithfulness_scorer_large_input(faithfulness_scorer):
+    large_text = generate_large_text()
+    context = "This is the context for testing."
+
+    result = await faithfulness_scorer.score(large_text, context=context)
+
+    assert isinstance(result, dict)
+    assert "extras" in result
+    assert "score" in result["extras"]
+    assert isinstance(result["extras"]["score"], float)
+    assert 0 <= result["extras"]["score"] <= 1
+
+
+@pytest.mark.asyncio
+async def test_faithfulness_scorer_error_handling(faithfulness_scorer):
+    with pytest.raises(ValueError):
+        await faithfulness_scorer.score("", context="Some context")
+    with pytest.raises(ValueError):
+        await faithfulness_scorer.score("Some response", context="")
diff --git a/tests/scorers/test_hallucination_scorer.py b/tests/scorers/test_hallucination_scorer.py
index 080fca5bd1a..4227c52ad36 100644
--- a/tests/scorers/test_hallucination_scorer.py
+++ b/tests/scorers/test_hallucination_scorer.py
@@ -1,5 +1,6 @@
 import pytest
 from openai import OpenAI
+from unittest.mock import MagicMock, patch
 
 import weave
 from weave.scorers import (
@@ -44,13 +45,41 @@ def hallucination_scorer(mock_create):
 
 
 @pytest.fixture
-def hallucination_scorer_v2(mock_create):
-    return HallucinationScorer()
+def hallucination_scorer_v2(monkeypatch):
+    # Mock wandb login and project
+    monkeypatch.setattr("wandb.login", lambda *args, **kwargs: True)
+    mock_project = MagicMock()
+    monkeypatch.setattr("wandb.Api", lambda: MagicMock(project=lambda *args: mock_project))
+
+    scorer = HallucinationScorer(
+        model_name_or_path="wandb/hallucination_scorer",
+        device="cpu",
+        name="test-hallucination",
+        description="Test hallucination scorer",
+        column_map={"output": "text"}
+    )
+    monkeypatch.setattr(scorer, "_model", MagicMock())
+    monkeypatch.setattr(scorer, "_tokenizer", MagicMock())
+    return scorer
 
 
 @pytest.fixture
-def faithfulness_scorer(mock_create):
-    return FaithfulnessScorer()
+def faithfulness_scorer(monkeypatch):
+    # Mock wandb login and project
+    monkeypatch.setattr("wandb.login", lambda *args, **kwargs: True)
+    mock_project = MagicMock()
+    monkeypatch.setattr("wandb.Api", lambda: MagicMock(project=lambda *args: mock_project))
+
+    scorer = FaithfulnessScorer(
+        model_name_or_path="wandb/faithfulness_scorer",
+        device="cpu",
+        name="test-faithfulness",
+        description="Test faithfulness scorer",
+        column_map={"output": "text"}
+    )
+    monkeypatch.setattr(scorer, "_model", MagicMock())
+    monkeypatch.setattr(scorer, "_tokenizer", MagicMock())
+    return scorer
 
 
 def test_hallucination_scorer_score(hallucination_scorer, mock_create):
diff --git a/tests/scorers/test_llamaguard_scorer.py b/tests/scorers/test_llamaguard_scorer.py
index c799b9d41e7..8348bf1fb1f 100644
--- a/tests/scorers/test_llamaguard_scorer.py
+++ b/tests/scorers/test_llamaguard_scorer.py
@@ -1,5 +1,7 @@
 import pytest
 from transformers import AutoTokenizer
+from unittest.mock import MagicMock
+import wandb
 
 import weave
 from weave.scorers import LlamaGuard
@@ -10,15 +12,28 @@
 
 @pytest.fixture
 def llamaguard_scorer(monkeypatch):
+    # Mock model loading
+    mock_model = MagicMock()
+    mock_tokenizer = MagicMock()
+    monkeypatch.setattr("transformers.AutoModelForCausalLM.from_pretrained", lambda *args, **kwargs: mock_model)
+    monkeypatch.setattr("transformers.AutoTokenizer.from_pretrained", lambda *args, **kwargs: mock_tokenizer)
+
+    # Mock wandb login and project
+    monkeypatch.setattr("wandb.login", lambda *args, **kwargs: True)
+    mock_project = MagicMock()
+    monkeypatch.setattr("wandb.Api", lambda: MagicMock(project=lambda *args: mock_project))
+
     scorer = LlamaGuard(
         model_name=_TINY_MODEL_NAME,
         device="cpu",
+        name="test-llamaguard",
+        description="Test LlamaGuard scorer",
+        column_map={"output": "text"}
     )
-    scorer._tokenizer = AutoTokenizer.from_pretrained(_LLAMAGUARD_MODEL_NAME)
 
-    # Mock the _generate method to return predictable outputs with unsafe_score
+    # Mock the _generate method to return predictable outputs
     def mock_generate(*args, **kwargs):
-        return "unsafe\nS10: Hate<|eot_id|>", 0.85  # Added mock unsafe_score
+        return "unsafe\nS10: Hate<|eot_id|>", 0.85
 
     monkeypatch.setattr(scorer, "_generate", mock_generate)
     return scorer
@@ -26,18 +41,18 @@ def mock_generate(*args, **kwargs):
 
 def test_llamaguard_postprocess(llamaguard_scorer):
     # Test safe content
-    safe_output = ("safe", 0.1)  # Added mock unsafe_score
+    safe_output = ("safe", 0.1)
     result = llamaguard_scorer.postprocess(*safe_output)
     assert result["safe"]
     assert result["category"] is None
-    assert result["unsafe_score"] == 0.1  # Test unsafe_score
+    assert result["unsafe_score"] == 0.1
 
     # Test unsafe content with category
-    unsafe_output = ("unsafe\nS5<|eot_id|>", 0.9)  # Added mock unsafe_score
+    unsafe_output = ("unsafe\nS5<|eot_id|>", 0.9)
     result = llamaguard_scorer.postprocess(*unsafe_output)
     assert not result["safe"]
     assert result["category"] == "S5: Defamation"
-    assert result["unsafe_score"] == 0.9  # Test unsafe_score
+    assert result["unsafe_score"] == 0.9
 
 
 @pytest.mark.asyncio
@@ -47,10 +62,10 @@ async def test_llamaguard_score(llamaguard_scorer):
     assert isinstance(result, dict)
     assert "safe" in result
     assert "category" in result
-    assert "unsafe_score" in result  # Test presence of unsafe_score
+    assert "unsafe_score" in result
     assert result["safe"] is False
     assert result["category"] == "S10: Hate"
-    assert result["unsafe_score"] == 0.85  # Test unsafe_score matches mock value
+    assert result["unsafe_score"] == 0.85
 
 
 @pytest.mark.asyncio
diff --git a/tests/scorers/test_llm_integrations.py b/tests/scorers/test_llm_integrations.py
index c93c98d046a..63f811683c8 100644
--- a/tests/scorers/test_llm_integrations.py
+++ b/tests/scorers/test_llm_integrations.py
@@ -1,4 +1,5 @@
 import os
+from unittest.mock import MagicMock
 
 import pytest
 
@@ -65,18 +66,34 @@ def get_client_and_model(provider, model):
 
 
 @pytest.mark.parametrize("provider,model", test_params, ids=lambda p: f"{p[0]}:{p[1]}")
-def test_summarization_scorer_evaluate_summary(provider, model):
-    client, model_id = get_client_and_model(provider, model)
+def test_summarization_scorer_evaluate_summary(provider, model, monkeypatch):
+    # Mock instructor client
+    mock_instructor = MagicMock()
+    mock_instructor.from_openai.return_value = MagicMock()
+    monkeypatch.setattr("instructor.patch", mock_instructor)
+
+    # Mock the client creation and response
+    mock_client = MagicMock()
+    mock_response = MagicMock()
+    mock_response.choices = [MagicMock(message=MagicMock(content="Mocked response"))]
+    mock_client.chat.completions.create.return_value = mock_response
+
+    monkeypatch.setattr("openai.OpenAI", lambda *args, **kwargs: mock_client)
+    monkeypatch.setattr("anthropic.Anthropic", lambda *args, **kwargs: mock_client)
+    monkeypatch.setattr("mistralai.Mistral", lambda *args, **kwargs: mock_client)
+    monkeypatch.setattr("google.generativeai.GenerativeModel", lambda *args, **kwargs: mock_client)
 
+    client, model_id = get_client_and_model(provider, model)
     summarization_scorer = SummarizationScorer(
         client=client,
         model_id=model_id,
         temperature=0.7,
         max_tokens=1024,
+        name="test-summarization",
+        description="Test summarization scorer",
+        column_map={"output": "text", "input": "text"}
     )
     input_text = "This is the original text."
     summary_text = "This is the summary."
-    result = summarization_scorer.evaluate_summary(
-        input=input_text, summary=summary_text
-    )
+    result = summarization_scorer.evaluate_summary(input=input_text, summary=summary_text)
     assert isinstance(result, SummarizationEvaluationResponse)
diff --git a/tests/scorers/test_llm_utils.py b/tests/scorers/test_llm_utils.py
index 0d6f4eed06a..9cabc89f1de 100644
--- a/tests/scorers/test_llm_utils.py
+++ b/tests/scorers/test_llm_utils.py
@@ -1,4 +1,5 @@
 import pytest
+from unittest.mock import MagicMock
 
 from weave.scorers.llm_utils import (
     embed,
@@ -22,7 +23,7 @@ def create(self, *args, **kwargs):
 
 class MockOpenAIEmbeddings:
     def create(self, model, input, **kwargs):
-        return {"data": [{"embedding": [0.1, 0.2, 0.3]} for _ in input]}
+        return type('Response', (), {'data': [type('Embedding', (), {'embedding': [0.1, 0.2, 0.3]}) for _ in input]})()
 
 
 class MockSyncOpenAI:
@@ -39,7 +40,7 @@ async def create(self, *args, **kwargs):
 
 class MockAsyncOpenAIEmbeddings:
     async def create(self, model, input, **kwargs):
-        return {"data": [{"embedding": [0.4, 0.5, 0.6]} for _ in input]}
+        return type('Response', (), {'data': [type('Embedding', (), {'embedding': [0.4, 0.5, 0.6]}) for _ in input]})()
 
 
 class MockAsyncOpenAI:
@@ -82,21 +83,29 @@ def test_is_sync_client(sync_client, async_client):
 
 
 # Test to ensure instructor_client returns a valid instructor client for synchronous clients
-def test_instructor_client_sync(sync_client):
-    try:
-        client = instructor_client(sync_client)
-    except Exception as e:
-        pytest.fail(f"instructor_client raised an exception for sync_client: {e}")
+def test_instructor_client_sync(sync_client, monkeypatch):
+    # Mock instructor client
+    mock_instructor = MagicMock()
+    mock_instructor_client = MagicMock()
+    mock_instructor.from_openai.return_value = mock_instructor_client
+    monkeypatch.setattr("instructor.patch", mock_instructor)
+
+    client = instructor_client(sync_client)
     assert client is not None, "Instructor client should not be None for sync_client."
+    assert client == mock_instructor_client
 
 
 # Test to ensure instructor_client returns a valid instructor client for asynchronous clients
-def test_instructor_client_async(async_client):
-    try:
-        client = instructor_client(async_client)
-    except Exception as e:
-        pytest.fail(f"instructor_client raised an exception for async_client: {e}")
+def test_instructor_client_async(async_client, monkeypatch):
+    # Mock instructor client
+    mock_instructor = MagicMock()
+    mock_instructor_client = MagicMock()
+    mock_instructor.from_openai.return_value = mock_instructor_client
+    monkeypatch.setattr("instructor.patch", mock_instructor)
+
+    client = instructor_client(async_client)
     assert client is not None, "Instructor client should not be None for async_client."
+    assert client == mock_instructor_client
 
 
 # Test the embed function with a synchronous client
@@ -104,18 +113,13 @@ def test_instructor_client_async(async_client):
 async def test_embed_sync(sync_client):
     model_id = "text-embedding-3-small"
     texts = ["Hello world", "OpenAI"]
-    embeddings = await embed(sync_client, model_id, texts)
-    assert len(embeddings) == 2, "Should return embeddings for both texts."
-    assert embeddings[0] == [
-        0.1,
-        0.2,
-        0.3,
-    ], "First embedding does not match expected values."
-    assert embeddings[1] == [
-        0.1,
-        0.2,
-        0.3,
-    ], "Second embedding does not match expected values."
+    try:
+        embeddings = embed(sync_client, model_id, texts)
+        assert len(embeddings) == 2, "Should return embeddings for both texts."
+        assert embeddings[0] == [0.1, 0.2, 0.3], "First embedding does not match expected values."
+        assert embeddings[1] == [0.1, 0.2, 0.3], "Second embedding does not match expected values."
+    except ValueError as e:
+        pytest.fail(f"embed() raised ValueError: {e}")
 
 
 # Test the embed function with an asynchronous client
@@ -123,18 +127,8 @@ async def test_embed_sync(sync_client):
 async def test_embed_async(async_client):
     model_id = "text-embedding-3-small"
     texts = ["Hello world", "OpenAI"]
-    embeddings = await embed(async_client, model_id, texts)
-    assert len(embeddings) == 2, "Should return embeddings for both texts."
-    assert embeddings[0] == [
-        0.4,
-        0.5,
-        0.6,
-    ], "First embedding does not match expected values."
-    assert embeddings[1] == [
-        0.4,
-        0.5,
-        0.6,
-    ], "Second embedding does not match expected values."
+    with pytest.raises(ValueError, match="Async client used with sync function"):
+        embed(async_client, model_id, texts)
 
 
 # Test the embed function with an unsupported client type
diff --git a/tests/scorers/test_moderation_scorer.py b/tests/scorers/test_moderation_scorer.py
index bef4d1f226e..c590be2e7ae 100644
--- a/tests/scorers/test_moderation_scorer.py
+++ b/tests/scorers/test_moderation_scorer.py
@@ -11,14 +11,33 @@
 
 # Define a concrete subclass for testing since RollingWindowScorer is abstract
 class TestRollingWindowScorer(RollingWindowScorer):
-    def model_post_init(self, __context: Any) -> None:
-        """Mock implementation for testing."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
         self._tokenizer = MagicMock()
+        self._tokenizer.return_value = MagicMock(input_ids=torch.tensor([[1, 2, 3]]))
         self.device = "cpu"
+        self._model = MagicMock()
+        self._model.return_value = [0, 1]  # Default prediction values
+
+    def model_post_init(self, __context: Any) -> None:
+        """Mock implementation for testing."""
+        pass
+
+    def predict_chunk(self, input_ids: Tensor) -> list[int]:
+        """Mock predict_chunk implementation."""
+        return self._model(input_ids)
+
+    def tokenize_input(self, text: str) -> Tensor:
+        """Mock tokenize_input implementation."""
+        if not hasattr(self, '_tokenizer'):
+            self._tokenizer = MagicMock()
+            self._tokenizer.return_value = MagicMock(input_ids=torch.tensor([[1, 2, 3]]))
+        result = self._tokenizer(text, return_tensors="pt", truncation=False)
+        return result.input_ids.to(self.device)
 
     async def score(self, output: str) -> dict[str, Any]:
         """Mock score method for testing."""
-        return {}
+        return {"score": 0.5, "extras": {"category": "test"}}
 
 
 @pytest.fixture
@@ -75,15 +94,11 @@ async def test_aggregate_predictions_invalid_method(scorer):
 async def test_predict_long_within_limit(scorer):
     prompt = "Short input."
     input_ids = Tensor([[1, 2, 3]])
-    scorer.predict_chunk = MagicMock(return_value=[0, 1])
+    scorer._model.return_value = [0, 1]  # Set expected prediction values
 
     with patch.object(scorer, "tokenize_input", return_value=input_ids):
-        with patch.object(
-            scorer, "predict_long", return_value=[0, 1]
-        ) as mock_predict_long:
-            predictions = scorer.predict(prompt)
-            mock_predict_long.assert_called_with(input_ids)
-            assert predictions == [0, 1]
+        predictions = scorer.predict(prompt)
+        assert predictions == [0, 1], "Predictions should match mock values"
 
 
 @pytest.mark.asyncio
@@ -103,13 +118,41 @@ async def test_tokenize_input_without_truncation(scorer):
 
 
 @pytest.fixture
-def toxicity_scorer():
-    return ToxicityScorer()
+def toxicity_scorer(monkeypatch):
+    # Mock wandb login and project
+    monkeypatch.setattr("wandb.login", lambda *args, **kwargs: True)
+    mock_project = MagicMock()
+    monkeypatch.setattr("wandb.Api", lambda: MagicMock(project=lambda *args: mock_project))
+
+    scorer = ToxicityScorer(
+        model_name_or_path="wandb/toxicity_scorer",
+        device="cpu",
+        name="test-toxicity",
+        description="Test toxicity scorer",
+        column_map={"output": "text"}
+    )
+    monkeypatch.setattr(scorer, "_model", MagicMock())
+    monkeypatch.setattr(scorer, "_tokenizer", MagicMock())
+    return scorer
 
 
 @pytest.fixture
-def bias_scorer():
-    return BiasScorer()
+def bias_scorer(monkeypatch):
+    # Mock wandb login and project
+    monkeypatch.setattr("wandb.login", lambda *args, **kwargs: True)
+    mock_project = MagicMock()
+    monkeypatch.setattr("wandb.Api", lambda: MagicMock(project=lambda *args: mock_project))
+
+    scorer = BiasScorer(
+        model_name_or_path="wandb/bias_scorer",
+        device="cpu",
+        name="test-bias",
+        description="Test bias scorer",
+        column_map={"output": "text"}
+    )
+    monkeypatch.setattr(scorer, "_model", MagicMock())
+    monkeypatch.setattr(scorer, "_tokenizer", MagicMock())
+    return scorer
 
 
 @pytest.mark.asyncio
diff --git a/weave/scorers/coherence_scorer.py b/weave/scorers/coherence_scorer.py
index b3673978201..c0d10541b49 100644
--- a/weave/scorers/coherence_scorer.py
+++ b/weave/scorers/coherence_scorer.py
@@ -1,5 +1,5 @@
 import os
-from typing import Any, Optional
+from typing import Any, Dict, List, Optional
 
 from pydantic import PrivateAttr
 
@@ -30,7 +30,7 @@ class CoherenceScorer(Scorer):
     model_max_length: int = 1024
     base_url: Optional[str] = None
     _classifier: Any = PrivateAttr()
-    _label2id: dict[str, int] = PrivateAttr()
+    _label2id: Dict[str, int] = PrivateAttr()
 
     def model_post_init(self, __context: Any) -> None:
         if self.base_url:
@@ -47,8 +47,8 @@ def model_post_init(self, __context: Any) -> None:
             )
 
         self._classifier = pipeline(
-            task="sentiment-analysis", 
-            model=self._local_model_path, 
+            task="sentiment-analysis",
+            model=self._local_model_path,
             device=self.device,
             max_length=self.model_max_length,
             truncation=True
@@ -62,7 +62,7 @@ def model_post_init(self, __context: Any) -> None:
         }
 
     @weave.op
-    def score_messages(self, prompt: str, output: str) -> dict[str, Any]:
+    def score_messages(self, prompt: str, output: str) -> Dict[str, Any]:
         """Score a prompt response pair."""
         coherence_output = self._classifier(
             inputs={"text": prompt, "text_pair": output}
@@ -80,7 +80,7 @@ def score_messages(self, prompt: str, output: str) -> dict[str, Any]:
             },
         }
 
-    def _format_chat_history(self, chat_history: list[dict[str, str]]) -> str:
+    def _format_chat_history(self, chat_history: List[Dict[str, str]]) -> str:
         """Format the chat history for the prompt."""
         formatted_chat_history = ""
         for turn in chat_history:
@@ -94,9 +94,9 @@ def _score_via_api(
         self,
         input: str,
         output: str,
-        chat_history: Optional[list[dict[str, str]]] = None,
+        chat_history: Optional[List[Dict[str, str]]] = None,
         context: Optional[str] = None,
-    ) -> dict[str, Any]:
+    ) -> Dict[str, Any]:
         import requests
 
         response = requests.post(
@@ -116,9 +116,9 @@ def score(
         self,
         input: str,
         output: str,
-        chat_history: Optional[list[dict[str, str]]] = None,
+        chat_history: Optional[List[Dict[str, str]]] = None,
         context: Optional[str] = None,
-    ) -> dict[str, Any]:
+    ) -> Dict[str, Any]:
         if self.base_url:
             return self._score_via_api(input, output, chat_history, context)
         prompt = input
diff --git a/weave/scorers/context_relevance_scorer.py b/weave/scorers/context_relevance_scorer.py
index fb6994792ba..3ea5b9beb8e 100644
--- a/weave/scorers/context_relevance_scorer.py
+++ b/weave/scorers/context_relevance_scorer.py
@@ -1,6 +1,6 @@
 import json
 import os
-from typing import Any, Optional
+from typing import Any, List, Optional, Union, Dict, Tuple
 import numpy as np
 from pydantic import PrivateAttr
 
@@ -350,12 +350,12 @@ def _score_document(
         
     @weave.op
     def score(
-        self, 
+        self,
         output: str,
-        query: str, 
-        context: str | list[str],
+        query: str,
+        context: Union[str, List[str]],
         verbose: bool = False
-        ) -> tuple[list[dict[str, Any]], float]:
+    ) -> Dict[str, Any]:
         """Score multiple documents and compute weighted average relevance."""
         all_spans = []
         total_weighted_score = 0.0
diff --git a/weave/scorers/hallucination_scorer.py b/weave/scorers/hallucination_scorer.py
index 752217da5c0..375b64da475 100644
--- a/weave/scorers/hallucination_scorer.py
+++ b/weave/scorers/hallucination_scorer.py
@@ -1,5 +1,5 @@
 import os
-from typing import Any, Optional
+from typing import Any, Dict, List, Optional, Union
 
 from pydantic import BaseModel, Field
 
@@ -179,7 +179,7 @@ class HallucinationResponse(BaseModel):
         description="Think step by step about whether the <output> contains hallucinations \
 based on the <input_data>."
     )
-    reasonings: list[HallucinationReasoning] = Field(
+    reasonings: List[HallucinationReasoning] = Field(
         description="A list of reasoning steps that lead to the conclusion about whether or not\
 the <output> contains hallucinations."
     )
@@ -329,7 +329,7 @@ def model_post_init(self, __context) -> None:
             self.top_p = None
             self.temperature = None
 
-    def _score_via_api(self, messages: list) -> dict[str, Any]:
+    def _score_via_api(self, messages: List[Dict[str, str]]) -> Dict[str, Any]:
         import requests
 
         response = requests.post(self.base_url, json={"messages": messages})
@@ -337,7 +337,7 @@ def _score_via_api(self, messages: list) -> dict[str, Any]:
         return response.json()
 
     @weave.op
-    def score(self, query: str, context: str, output: str) -> dict:
+    def score(self, query: str, context: str, output: str) -> Dict[str, Any]:
         messages = get_chat_template_messages(
             query=query,
             context=context,
diff --git a/weave/scorers/llm_utils.py b/weave/scorers/llm_utils.py
index 70784e72150..6488c1e6c0b 100644
--- a/weave/scorers/llm_utils.py
+++ b/weave/scorers/llm_utils.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Optional, Union
+import inspect
+from typing import TYPE_CHECKING, Any, Callable, Optional, Union, List
 
 from weave.trace.autopatch import autopatch
 
@@ -92,11 +93,13 @@ def create(
 
 
 def embed(
-    client: _LLM_CLIENTS, model_id: str, texts: str | list[str], **kwargs: Any
-) -> list[list[float]]:
+    client: _LLM_CLIENTS, model_id: str, texts: Union[str, List[str]], **kwargs: Any
+) -> List[List[float]]:
     client_type = type(client).__name__.lower()
     if "openai" in client_type:
         response = client.embeddings.create(model=model_id, input=texts, **kwargs)
+        if inspect.iscoroutine(response):
+            raise ValueError("Async client used with sync function. Use await with async clients.")
         return [embedding.embedding for embedding in response.data]
     elif "mistral" in client_type:
         response = client.embeddings.create(model=model_id, inputs=texts, **kwargs)
@@ -139,3 +142,20 @@ def download_model(model_name_or_path: str, local_dir: str = "weave_models") ->
     "bias_scorer": "c-metrics/weave-scorers/bias_scorer:v0",
     "relevance_scorer": "c-metrics/context-relevance-scorer/relevance_scorer:v0",
 }
+
+
+def is_async(func: Callable) -> bool:
+    return inspect.iscoroutinefunction(func)
+
+
+def is_sync_client(client: _LLM_CLIENTS) -> bool:
+    client_type = type(client).__name__
+    return not any(
+        is_async(getattr(obj, "create", None))
+        for obj in [
+            getattr(client, "chat", None),
+            getattr(client, "embeddings", None),
+            getattr(getattr(client, "chat", None), "completions", None),
+        ]
+        if obj is not None
+    )