diff --git a/.gitignore b/.gitignore
index 3969475..500b88f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ RAG/tmp.py
 Carrot-Assistant/omop_tmp.py
 RAG/.cache/
 *.qdrant
+/Carrot-Assistant/tests/log
diff --git a/Carrot-Assistant/components/pipeline.py b/Carrot-Assistant/components/pipeline.py
index 29e8249..7c35f95 100644
--- a/Carrot-Assistant/components/pipeline.py
+++ b/Carrot-Assistant/components/pipeline.py
@@ -9,7 +9,6 @@
 from components.embeddings import Embeddings
 from components.models import get_model
 from components.prompt import Prompts
-from tests.test_prompt_build import mock_rag_results
 
 
 class llm_pipeline:
@@ -52,10 +51,12 @@ def get_simple_assistant(self) -> Pipeline:
         self._logger.info(f"Pipeline initialized in {time.time()-start} seconds")
         start = time.time()
 
-        pipeline.add_component("prompt", Prompts(
-            model_name=self._model_name,
-            eot_token=self._eot_token
-            ).get_prompt())
+        pipeline.add_component(
+            "prompt",
+            Prompts(
+                model_name=self._model_name, eot_token=self._eot_token
+            ).get_prompt(),
+        )
         self._logger.info(f"Prompt added to pipeline in {time.time()-start} seconds")
         start = time.time()
 
@@ -72,6 +73,7 @@ def get_simple_assistant(self) -> Pipeline:
         self._logger.info(f"Pipeline connected in {time.time()-start} seconds")
 
         return pipeline
+
     def get_rag_assistant(self) -> Pipeline:
         """
         Get an assistant that uses vector search to populate a prompt for an LLM
@@ -85,46 +87,50 @@ def get_rag_assistant(self) -> Pipeline:
         pipeline = Pipeline()
         self._logger.info(f"Pipeline initialized in {time.time()-start} seconds")
         start = time.time()
-        
-        
+
         vec_search = Embeddings(
-                embeddings_path=self._opt.embeddings_path,
-                force_rebuild=self._opt.force_rebuild,
-                embed_vocab=self._opt.embed_vocab,
-                model_name=self._opt.embedding_model,
-                search_kwargs=self._opt.embedding_search_kwargs
-                )
-        
+            embeddings_path=self._opt.embeddings_path,
+            force_rebuild=self._opt.force_rebuild,
+            embed_vocab=self._opt.embed_vocab,
+            model_name=self._opt.embedding_model,
+            search_kwargs=self._opt.embedding_search_kwargs,
+        )
+
         vec_embedder = vec_search.get_embedder()
         vec_retriever = vec_search.get_retriever()
-        router = ConditionalRouter(routes=[
-            {
-                "condition": "{{vec_results[0].score > 0.95}}",
-                "output": "{{vec_results}}",
-                "output_name": "exact_match",
-                "output_type": List[Dict],
-            },
-            {
-                "condition": "{{vec_results[0].score <=0.95}}",
-                "output": "{{vec_results}}",
-                "output_name": "no_exact_match",
-                "output_type": List[Dict]
-            }
-            ])
+        router = ConditionalRouter(
+            routes=[
+                {
+                    "condition": "{{vec_results[0].score > 0.95}}",
+                    "output": "{{vec_results}}",
+                    "output_name": "exact_match",
+                    "output_type": List[Dict],
+                },
+                {
+                    "condition": "{{vec_results[0].score <=0.95}}",
+                    "output": "{{vec_results}}",
+                    "output_name": "no_exact_match",
+                    "output_type": List[Dict],
+                },
+            ]
+        )
         llm = get_model(
             model_name=self._model_name,
             temperature=self._opt.temperature,
             logger=self._logger,
         )
-        
+
         pipeline.add_component("query_embedder", vec_embedder)
         pipeline.add_component("retriever", vec_retriever)
         pipeline.add_component("router", router)
-        pipeline.add_component("prompt", Prompts(
-            model_name=self._model_name,
-            prompt_type="top_n_RAG",
-            eot_token=self._eot_token
-            ).get_prompt())
+        pipeline.add_component(
+            "prompt",
+            Prompts(
+                model_name=self._model_name,
+                prompt_type="top_n_RAG",
+                eot_token=self._eot_token,
+            ).get_prompt(),
+        )
         pipeline.add_component("llm", llm)
 
         pipeline.connect("query_embedder.embedding", "retriever.query_embedding")
diff --git a/Carrot-Assistant/evaluation/evaltypes.py b/Carrot-Assistant/evaluation/evaltypes.py
index bc0ec7b..ad9d343 100644
--- a/Carrot-Assistant/evaluation/evaltypes.py
+++ b/Carrot-Assistant/evaluation/evaltypes.py
@@ -1,18 +1,23 @@
 from abc import ABC, abstractmethod
-from typing import TypeVar, Generic
+from typing import TypeVar, Generic, Any
+
 
 class EvaluationFramework:
-    def __init__(self, results_file='results.json'):
+    def __init__(self, results_file="results.json"):
         self.results_file = results_file
+
     def run_evaluations(self):
         # Run some tests
         self._save_evaluations
+
     def _save_evaluations(self):
-		# Append to 'results.json'
+        # Append to 'results.json'
         pass
 
+
 class Metric(ABC):
     """Base class for all metrics."""
+
     @abstractmethod
     def calculate(self, *args, **kwargs) -> float:
         """
@@ -20,74 +25,95 @@ def calculate(self, *args, **kwargs) -> float:
         """
         pass
 
+
 class TestPipeline(ABC):
-	"""
-	Base class for Pipeline runs
-	"""
-	@abstractmethod
-	def run(self, *args, **kwargs):
-		"""
-		Run the pipeline
-		"""
-		pass
-
-M = TypeVar('M', bound=Metric)
+    """
+    Base class for Pipeline runs
+    """
+
+    @abstractmethod
+    def run(self, *args, **kwargs) -> Any:
+        """
+        Run the pipeline
+        """
+        ...
+
+
+M = TypeVar("M", bound=Metric)
+
 
 class PipelineTest(Generic[M]):
     """
     Base class for Pipeline tests
     """
+
     def __init__(self, name: str, pipeline: TestPipeline, metrics: list[M]):
+        self.name = name
         self.pipeline = pipeline
         self.metrics = metrics
 
     @abstractmethod
     def run_pipeline(self, *args, **kwargs):
         pass
+
     @abstractmethod
     def evaluate(self, *args, **kwargs) -> dict[str, float]:
-    	pass
+        pass
+
 
 class SingleResultMetric(Metric):
     """Metric for evaluating pipelines that return a single result."""
 
+
 class InformationRetrievalMetric(Metric):
     """Metric for evaluating information retrieval pipelines."""
+
     pass
 
+
 class SingleResultPipeline(TestPipeline):
-	"""
-	Base class for pipelines returning a single result
-	"""
+    """
+    Base class for pipelines returning a single result
+    """
+
 
 class SingleResultPipelineTest(PipelineTest[SingleResultMetric]):
-    def __init__(self, name: str, pipeline: SingleResultPipeline, metrics: list[SingleResultMetric]):
+    def __init__(
+        self,
+        name: str,
+        pipeline: SingleResultPipeline,
+        metrics: list[SingleResultMetric],
+    ):
         super().__init__(name, pipeline, metrics)
-    
+
     def run_pipeline(self, input_data):
         """
         Run the pipeline with the given input data.
-        
+
         Args:
         input_data: The input data for the pipeline.
-        
+
         Returns:
         The result of running the pipeline on the input data.
         """
         return self.pipeline.run(input_data)
-    
+
     def evaluate(self, input_data, expected_output):
         """
         Evaluate the pipeline by running it on the input data and comparing the result
         to the expected output using all metrics.
-        
+
         Args:
         input_data: The input data for the pipeline.
         expected_output: The expected output to compare against.
-        
+
         Returns:
         A dictionary mapping metric names to their calculated values.
         """
         pipeline_output = self.run_pipeline(input_data)
-        return {metric.__class__.__name__: metric.calculate(pipeline_output, expected_output) 
-                for metric in self.metrics}
+        return {
+            metric.__class__.__name__: metric.calculate(
+                pipeline_output, expected_output
+            )
+            for metric in self.metrics
+        }
diff --git a/Carrot-Assistant/evaluation/pipelines.py b/Carrot-Assistant/evaluation/pipelines.py
new file mode 100644
index 0000000..30f3066
--- /dev/null
+++ b/Carrot-Assistant/evaluation/pipelines.py
@@ -0,0 +1,47 @@
+from typing import Dict
+from evaluation.evaltypes import SingleResultPipeline
+from options.pipeline_options import LLMModel
+from components.models import local_models
+from jinja2 import Template
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+
+
+class LLMPipeline(SingleResultPipeline):
+    """
+    This class runs a simple LLM-only pipeline on provided input
+    """
+
+    def __init__(self, llm: LLMModel, prompt_template: Template) -> None:
+        """
+        Initialises the LLMPipeline class
+
+        Parameters
+        ----------
+        llm: LLMModel
+            One of the model options in the LLMModel enum
+        prompt_template: Template
+            A jinja2 template for a prompt
+        """
+        self.llm = llm
+        self.prompt_template = prompt_template
+        self._model = Llama(hf_hub_download(**local_models[self.llm.value]))
+
+    def run(self, input: Dict[str, str]) -> str:
+        """
+        Runs the LLMPipeline on a given input
+
+        Parameters
+        ----------
+        input: Dict[str, str]
+            The input is rendered into a prompt string by the .render method of the prompt template, so needs to be a dictionary of the template's parameters
+
+        Returns
+        -------
+        str
+            The output of running the prompt through the given model
+        """
+        prompt = self.prompt_template.render(input)
+        return self._model.create_chat_completion(
+            messages=[{"role": "user", "content": prompt}]
+        )["choices"][0]["message"]["content"]
diff --git a/Carrot-Assistant/tests/test_evals.py b/Carrot-Assistant/tests/test_evals.py
index f7f4c9c..f2ab72b 100644
--- a/Carrot-Assistant/tests/test_evals.py
+++ b/Carrot-Assistant/tests/test_evals.py
@@ -1,18 +1,26 @@
 import pytest
+from jinja2 import Environment, Template
+
 from evaluation.evaltypes import SingleResultPipeline, SingleResultPipelineTest
 from evaluation.metrics import ExactMatchMetric
+from evaluation.pipelines import LLMPipeline
+
+from options.pipeline_options import LLMModel
+
 
 class IdentityPipeline(SingleResultPipeline):
     def run(self, input_data):
         return input_data
 
+
 class ExactMatchTest(SingleResultPipelineTest):
     def __init__(self, name: str, pipeline: SingleResultPipeline):
         super().__init__(name, pipeline, [ExactMatchMetric()])
-    
+
     def run_pipeline(self, input_data):
         return self.pipeline.run(input_data)
-    
+
+
 class TestExactMatch:
     @pytest.fixture
     def identity_pipeline(self):
@@ -20,7 +28,9 @@ def identity_pipeline(self):
 
     @pytest.fixture
     def exact_match_test(self, identity_pipeline):
-        return SingleResultPipelineTest("Exact Match Test", identity_pipeline, [ExactMatchMetric()])
+        return SingleResultPipelineTest(
+            "Exact Match Test", identity_pipeline, [ExactMatchMetric()]
+        )
 
     @pytest.fixture
     def all_match_dataset(self):
@@ -32,12 +42,20 @@ def no_match_dataset(self):
 
     @pytest.fixture
     def half_match_dataset(self):
-        return [("input1", "input1"), ("input2", "output2"), ("input3", "input3"), ("input4", "output4")]
+        return [
+            ("input1", "input1"),
+            ("input2", "output2"),
+            ("input3", "input3"),
+            ("input4", "output4"),
+        ]
 
     def run_test(self, test, dataset):
-        results = [test.evaluate(input_data, expected_output) for input_data, expected_output in dataset]
-        exact_match_results = [result['ExactMatchMetric'] for result in results]
-        return sum(exact_match_results) / len(exact_match_results)    
+        results = [
+            test.evaluate(input_data, expected_output)
+            for input_data, expected_output in dataset
+        ]
+        exact_match_results = [result["ExactMatchMetric"] for result in results]
+        return sum(exact_match_results) / len(exact_match_results)
 
     def test_all_match(self, exact_match_test, all_match_dataset):
         assert self.run_test(exact_match_test, all_match_dataset) == 1.0
@@ -47,3 +65,28 @@ def test_no_match(self, exact_match_test, no_match_dataset):
 
     def test_half_match(self, exact_match_test, half_match_dataset):
         assert self.run_test(exact_match_test, half_match_dataset) == 0.5
+
+
+# LLM pipeline tests
+
+
+class TestBasicLLM:
+    @pytest.fixture
+    def llm_prompt(self):
+        env = Environment()
+        template = env.from_string(
+            """
+                                   You are a parrot that repeats whatever is said to you, with no explanation. You will be given a sentence as input, repeat it.
+
+                                   Sentence: {{input_sentence}}
+                                   """
+        )
+        return template
+
+    @pytest.fixture
+    def llm_pipeline(self, llm_prompt):
+        return LLMPipeline(LLMModel.LLAMA_3_1_8B, llm_prompt)
+
+    def test_returns_string(self, llm_pipeline):
+        model_output = llm_pipeline.run({"input_sentence": "Polly wants a cracker"})
+        assert isinstance(model_output, str)