Merge pull request #24 from arthur-ai/develop

release: full local UI
arthur-ai · Aug 8, 2023 · 0aa7ed0 · 0aa7ed0
2 parents b8a6219 + 10e31c6
commit 0aa7ed0
Show file tree

Hide file tree

Showing 371 changed files with 20,472 additions and 2,944 deletions.
diff --git a/.gitignore b/.gitignore
@@ -26,4 +26,6 @@ include/
 *.egg-info/
 .installed.cfg
 *.egg
-
+arthur_bench/server/js/dist/*
+arthur_bench/server/js/node_modules/*
+arthur_bench/server/js/packages/*/node_modules/
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1 +1 @@
-recursive-include arthur_bench/server/html/*
+recursive-include arthur_bench/server/js/dist *
diff --git a/README.md b/README.md
@@ -9,19 +9,28 @@ Bench is built for evaluating LLMs for production use cases. Bench can be used f
 ## Getting started
 
 ### Package installation and environment setup
-Install Bench with minimum dependencies:
-`pip install -e .`
+First download the tar file from the Github releases. Next install the package to your python environment.
+
+Install Bench with optional dependencies for serving results locally (recommended):  
+`pip install --find-links=./directory_with_tar_file 'arthur-bench[server]'`
 
-Install Bench with optional dependencies for serving results locally:  
-`pip install -e '.[server]'`
+Install Bench with minimum dependencies:
+`pip install --find-links=./directory_with_tar_file 'arthur-bench'`
 
 Bench saves test suites and test runs to the directory specified by the `BENCH_FILE_DIR`, which defaults to `./bench`
 
-#### Viewing Examples
-To explore Bench suites and runs for an example datasets, run `bench --directory examples/bench`. This will spin up a server where you can view sample created Test Suites and evaluate Runs across different model and prompt configurations.
+#### Exploring the UI with Examples
+The following commands will spin up a local UI serving two example test suites we've added
+
+```
+git clone [email protected]:arthur-ai/bench.git
+cd bench/examples  # navigate to bench root directory
+bench
+```
+This will spin up a server where you can view sample created Test Suites and evaluate Runs across different model and prompt configurations.
 
 In the `examples/` folder, you will find demo notebooks used to generate the Test Suites and Run results recorded in the directory. 
-**Running these notebooks directly, without deleting the pre-existing results from the directory, will result in errors.** Please use these as a a reference in creating your own Test Suites and Runs.
+**To run these notebooks directly, configure the BENCH_FILE_DIR to a new file system in the top cell of the notebook.** Please use these as a a reference in creating your own Test Suites and Runs.
 
 ## Key Concepts
 

diff --git a/arthur_bench/client/bench_client.py b/arthur_bench/client/bench_client.py
@@ -0,0 +1,86 @@
+from typing import Optional, TypeVar
+from abc import ABC, abstractmethod
+
+from arthur_bench.models.models import (
+    PaginatedTestSuites,
+    CreateRunResponse,
+    CreateRunRequest,
+    PaginatedRuns,
+    PaginatedRun,
+    TestSuiteRequest,
+    PaginatedTestSuite,
+    TestSuiteSummary,
+)
+
+TBenchClient = TypeVar("TBenchClient", bound="BenchClient")
+
+
+class BenchClient(ABC):
+
+    @abstractmethod
+    def get_test_suites(
+        self,
+        name: Optional[str] = None,
+        sort: Optional[str] = None,
+        scoring_method: Optional[str] = None,
+        page: int = 1,
+        page_size: int = 5
+    ) -> PaginatedTestSuites:
+        raise NotImplementedError
+
+    @abstractmethod
+    def create_test_suite(self, json_body: TestSuiteRequest) -> PaginatedTestSuite:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_test_suite(
+        self, 
+        test_suite_id: str,
+        page: int = 1,
+        page_size: int = 5) -> PaginatedTestSuite:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_runs_for_test_suite(
+        self, 
+        test_suite_id: str, 
+        sort: Optional[str] = None,
+        page: int = 1,
+        page_size: int = 5
+    ) -> PaginatedRuns:
+        raise NotImplementedError
+
+    @abstractmethod
+    def create_new_test_run(
+        self, test_suite_id: str, json_body: CreateRunRequest
+    ) -> CreateRunResponse:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_test_run(
+        self,
+        test_suite_id: str,
+        test_run_id: str,
+        page: int = 1,
+        page_size: int = 5,
+        sort: Optional[bool] = None,
+    ) -> PaginatedRun:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_summary_statistics(
+        self,
+        test_suite_id: str,
+        run_id: Optional[str] = None,
+        page: int = 1,
+        page_size: int = 5
+    ) -> TestSuiteSummary:
+        raise NotImplementedError
+
+    @abstractmethod
+    def delete_test_suite(self, test_suite_id: str):
+        raise NotImplementedError
+
+    @abstractmethod
+    def delete_test_run(self, test_suite_id: str, test_run_id: str):
+        raise NotImplementedError