From 2b5846b825eddc1810ca3e5a576ea673a4b4148b Mon Sep 17 00:00:00 2001
From: AndreiCautisanu <30831438+AndreiCautisanu@users.noreply.github.com>
Date: Thu, 10 Oct 2024 14:21:47 +0300
Subject: [PATCH] [OPIK-194] rest of e2e sanity tests, docstrings and
 architecture cleanup (#364)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* trace details test and basic dataset test

* cleanup and finishing up

* changed name of job

* removed on:push

---------

Co-authored-by: Andrei Căutișanu <andreicautisanu@Andreis-MacBook-Pro.local>
---
 .github/workflows/sanity.yml                  |   2 +-
 .../application_sanity/conftest.py            |  58 +++++++-
 .../application_sanity/test_sanity.py         | 126 ++++++++++++++----
 tests_end_to_end/page_objects/DatasetsPage.py |  17 +++
 .../page_objects/ExperimentsPage.py           |  12 ++
 .../page_objects/IndividualDatasetPage.py     |   9 ++
 tests_end_to_end/page_objects/TracesPage.py   |   1 -
 .../page_objects/TracesPageSpansMenu.py       |  23 +++-
 8 files changed, 216 insertions(+), 32 deletions(-)
 create mode 100644 tests_end_to_end/page_objects/DatasetsPage.py
 create mode 100644 tests_end_to_end/page_objects/ExperimentsPage.py
 create mode 100644 tests_end_to_end/page_objects/IndividualDatasetPage.py

diff --git a/.github/workflows/sanity.yml b/.github/workflows/sanity.yml
index aaf0517947..5a1700bfdc 100644
--- a/.github/workflows/sanity.yml
+++ b/.github/workflows/sanity.yml
@@ -4,7 +4,7 @@ on:
     workflow_dispatch:
 
 jobs:
-    test_installation:
+    e2e_sanity:
         runs-on: ubuntu-20.04
 
         steps:
diff --git a/tests_end_to_end/application_sanity/conftest.py b/tests_end_to_end/application_sanity/conftest.py
index 6ef87c9133..815327580c 100644
--- a/tests_end_to_end/application_sanity/conftest.py
+++ b/tests_end_to_end/application_sanity/conftest.py
@@ -2,10 +2,17 @@
 import os
 import opik
 import yaml
+import json
 from opik.configurator.configure import configure
 from opik.evaluation import evaluate
 from opik.evaluation.metrics import Contains, Equals
 from opik import opik_context, track, DatasetItem
+from playwright.sync_api import Page
+
+from page_objects.ProjectsPage import ProjectsPage
+from page_objects.TracesPage import TracesPage
+from page_objects.DatasetsPage import DatasetsPage
+from page_objects.ExperimentsPage import ExperimentsPage
 
 
 @pytest.fixture(scope='session', autouse=True)
@@ -30,6 +37,42 @@ def client(config):
     return opik.Opik(project_name=config['project']['name'], host='http://localhost:5173/api')
 
 
+@pytest.fixture(scope='function')
+def projects_page(page: Page):
+    projects_page = ProjectsPage(page)
+    projects_page.go_to_page()
+    return projects_page
+    
+
+@pytest.fixture(scope='function')
+def projects_page_timeout(page: Page):
+    projects_page = ProjectsPage(page)
+    projects_page.go_to_page()
+    projects_page.page.wait_for_timeout(7000)
+    return projects_page
+
+
+@pytest.fixture(scope='function')
+def traces_page(page: Page, projects_page, config):
+    projects_page.click_project(config['project']['name'])
+    traces_page = TracesPage(page)
+    return traces_page
+
+
+@pytest.fixture(scope='function')
+def datasets_page(page: Page):
+    datasets_page = DatasetsPage(page)
+    datasets_page.go_to_page()
+    return datasets_page
+
+
+@pytest.fixture(scope='function')
+def experiments_page(page: Page):
+    experiments_page = ExperimentsPage(page)
+    experiments_page.go_to_page()
+    return experiments_page
+
+
 @pytest.fixture(scope='module')
 def log_traces_and_spans_low_level(client, config):
     """
@@ -126,7 +169,7 @@ def make_trace(x):
         make_trace(x)
 
 
-@pytest.fixture(scope='function')
+@pytest.fixture(scope='module')
 def dataset(config, client):
     dataset_config = {
         'name': config['dataset']['name'],
@@ -141,7 +184,7 @@ def dataset(config, client):
     return dataset
 
 
-@pytest.fixture(scope='function')
+@pytest.fixture(scope='module')
 def create_experiments(config, dataset):
     exp_config = {
         'prefix': config['experiments']['prefix'],
@@ -186,3 +229,14 @@ def eval_equals(x: DatasetItem):
         scoring_metrics=[equals_metric]
     )
     
+
+@pytest.fixture(scope='function')
+def dataset_content(config):
+    curr_dir = os.path.dirname(__file__)
+    dataset_filepath = os.path.join(curr_dir, config['dataset']['filename'])
+
+    data = []
+    with open(dataset_filepath, 'r') as f:
+        for line in f:
+            data.append(json.loads(line))
+    return data
\ No newline at end of file
diff --git a/tests_end_to_end/application_sanity/test_sanity.py b/tests_end_to_end/application_sanity/test_sanity.py
index d940228f13..3b3badbd2f 100644
--- a/tests_end_to_end/application_sanity/test_sanity.py
+++ b/tests_end_to_end/application_sanity/test_sanity.py
@@ -1,28 +1,31 @@
 import pytest
+import json
+
 from playwright.sync_api import Page, expect
-from page_objects.ProjectsPage import ProjectsPage
-from page_objects.TracesPage import TracesPage
+
 from page_objects.TracesPageSpansMenu import TracesPageSpansMenu
+from page_objects.IndividualDatasetPage import IndividualDatasetPage
+
 
+def test_project_name(projects_page_timeout, log_traces_and_spans_decorator, log_traces_and_spans_low_level):
+    '''
+    Checks that the project created via the fixtures exists
+    Does a timeout of 5 seconds to wait for the traces to show up in UI for later tests (TODO: figure out a better way to do this)
 
-def test_project_name(page: Page, log_traces_and_spans_decorator, log_traces_and_spans_low_level):
-    projects_page = ProjectsPage(page)
-    projects_page.go_to_page()
-    projects_page.check_project_exists('test-project')
+    1. Open projects page
+    2. Check the created project exists
+    '''
+    projects_page_timeout.check_project_exists('test-project')
 
 
-def test_traces_created(page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator):
-    #navigate to project
-    projects_page = ProjectsPage(page)
-    projects_page.go_to_page()
-    
-    #wait for data to actually arrive to the frontend
-    #TODO: replace this with a smarter waiting mechanism
-    page.wait_for_timeout(5000)
-    projects_page.click_project(config['project']['name'])
+def test_traces_created(traces_page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator):
+    '''
+    Checks that every trace defined in the sanity_config file is present in the project
 
-    #grab all traces of project
-    traces_page = TracesPage(page)
+    1. Open the traces page of the project
+    2. Grab all the names of the traces (should never set more than 15 in config so 1 page is safe)
+    3. Check that every possible name of the traces as defined in sanity_config.yaml is present in the names list
+    '''
     trace_names = traces_page.get_all_trace_names()
 
     client_prefix = config['traces']['client']['prefix']
@@ -33,16 +36,13 @@ def test_traces_created(page, config, log_traces_and_spans_low_level, log_traces
             assert prefix+str(count) in trace_names
 
 
-def test_spans_of_traces(page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator):
-    projects_page = ProjectsPage(page)
-    projects_page.go_to_page()
-    
-    #wait for data to actually arrive to the frontend
-    #TODO: replace this with a smarter waiting mechanism
-    projects_page.click_project(config['project']['name'])
-
-    #grab all traces of project
-    traces_page = TracesPage(page)
+def test_spans_of_traces(page, traces_page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator):
+    '''
+    Checks that every trace has the correct number and names of spans defined in the sanity_config.yaml file
+    1. Open the traces page of the project
+    2. Go through each trace and click it
+    3. Check that the spans are present in each trace
+    '''
     trace_names = traces_page.get_all_trace_names()
 
     for trace in trace_names:
@@ -54,3 +54,75 @@ def test_spans_of_traces(page, config, log_traces_and_spans_low_level, log_trace
             spans_menu.check_span_exists_by_name(f'{prefix}{count}')
 
 
+def test_trace_and_span_details(page, traces_page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator):
+    '''
+    Checks that for each trace and spans, the attributes defined in sanity_config.yaml are present
+    1. Go through each trace of the project
+    2. Check the created tags are present
+    3. Check the created feedback scores are present
+    4. Check the defined metadata is present
+    5. Go through each span of the traces and repeat 2-4
+    '''
+    trace_names = traces_page.get_all_trace_names()
+
+    for trace in trace_names:
+        page.get_by_text(trace).click()
+        spans_menu = TracesPageSpansMenu(page)
+        trace_type = trace.split('-')[0]
+        tag_names = config['traces'][trace_type]['tags']
+
+        for tag in tag_names:
+            spans_menu.check_tag_exists_by_name(tag)
+
+        spans_menu.get_feedback_scores_tab().click()
+
+        for score in config['traces'][trace_type]['feedback-scores']:
+            expect(page.get_by_role('cell', name=score, exact=True)).to_be_visible()
+            expect(page.get_by_role('cell', name=str(config['traces'][trace_type]['feedback-scores'][score]), exact=True)).to_be_visible()
+        
+        spans_menu.get_metadata_tab().click()
+        for md_key in config['traces'][trace_type]['metadata']:
+            expect(page.get_by_text(f'{md_key}: {config['traces'][trace_type]['metadata'][md_key]}')).to_be_visible()
+        
+        for count in range(config['spans']['count']):
+            prefix = config['spans'][trace_type]['prefix']
+            spans_menu.get_first_span_by_name(f'{prefix}{count}').click()
+
+            spans_menu.get_feedback_scores_tab().click()
+            for score in config['spans'][trace_type]['feedback-scores']:
+                expect(page.get_by_role('cell', name=score, exact=True)).to_be_visible()
+                expect(page.get_by_role('cell', name=str(config['spans'][trace_type]['feedback-scores'][score]), exact=True)).to_be_visible()
+        
+            spans_menu.get_metadata_tab().click()
+            for md_key in config['spans'][trace_type]['metadata']:
+                expect(page.get_by_text(f'{md_key}: {config['spans'][trace_type]['metadata'][md_key]}')).to_be_visible()
+
+
+def test_dataset_name(datasets_page, config, dataset):
+    '''
+    Checks that the dataset created via the fixture as defined in sanity_config.yaml is present on the datasets page
+    '''
+    datasets_page.check_dataset_exists_by_name(config['dataset']['name'])
+
+
+def test_dataset_items(page: Page, datasets_page, config, dataset_content):
+    '''
+    Checks that the traces created via the fixture and defined in sanity_dataset.jsonl are present within the dataset
+    '''
+    datasets_page.select_database_by_name(config['dataset']['name'])
+
+    individual_dataset_page = IndividualDatasetPage(page)
+    for item in dataset_content:
+        individual_dataset_page.check_cell_exists_by_text(json.dumps(item['input']).replace('{', '{ ').replace('}', ' }'))
+        individual_dataset_page.check_cell_exists_by_text(json.dumps(item['expected_output']).replace('{', '{ ').replace('}', ' }'))
+        
+
+def test_experiments_exist(experiments_page, config, create_experiments):
+    '''
+    Checks that the experiments created via the fixture are present and have the correct values for the metrics (experiments defined in a way to always return the same results)
+    '''
+    experiments_page.check_experiment_exists_by_name('test-experiment-Equals')
+    expect(experiments_page.page.get_by_text('Equals0').first).to_be_visible()
+
+    experiments_page.check_experiment_exists_by_name('test-experiment-Contains')
+    expect(experiments_page.page.get_by_text('Contains1').first).to_be_visible()
diff --git a/tests_end_to_end/page_objects/DatasetsPage.py b/tests_end_to_end/page_objects/DatasetsPage.py
new file mode 100644
index 0000000000..d0509efba4
--- /dev/null
+++ b/tests_end_to_end/page_objects/DatasetsPage.py
@@ -0,0 +1,17 @@
+from playwright.sync_api import Page, expect
+
+class DatasetsPage:
+    def __init__(self, page: Page):
+        self.page = page
+        self.url = '/default/datasets'
+
+    def go_to_page(self):
+        self.page.goto(self.url)
+
+    def select_database_by_name(self, name):
+        self.page.get_by_text(name, exact=True).first.click()
+
+    def check_dataset_exists_by_name(self, dataset_name):
+        expect(self.page.get_by_text(dataset_name)).to_be_visible()
+
+    
\ No newline at end of file
diff --git a/tests_end_to_end/page_objects/ExperimentsPage.py b/tests_end_to_end/page_objects/ExperimentsPage.py
new file mode 100644
index 0000000000..0903206b66
--- /dev/null
+++ b/tests_end_to_end/page_objects/ExperimentsPage.py
@@ -0,0 +1,12 @@
+from playwright.sync_api import Page, expect
+
+class ExperimentsPage:
+    def __init__(self, page: Page):
+        self.page = page
+        self.url = '/default/experiments'
+
+    def go_to_page(self):
+        self.page.goto(self.url)
+    
+    def check_experiment_exists_by_name(self, name):
+        expect(self.page.get_by_text(name).first).to_be_visible()
\ No newline at end of file
diff --git a/tests_end_to_end/page_objects/IndividualDatasetPage.py b/tests_end_to_end/page_objects/IndividualDatasetPage.py
new file mode 100644
index 0000000000..00c3084433
--- /dev/null
+++ b/tests_end_to_end/page_objects/IndividualDatasetPage.py
@@ -0,0 +1,9 @@
+from playwright.sync_api import Page, expect
+
+class IndividualDatasetPage:
+    def __init__(self, page: Page):
+        self.page = page
+        self.traces_table = page.get_by_role('table')
+
+    def check_cell_exists_by_text(self, text):
+        expect(self.traces_table.get_by_text(text)).to_be_visible()
\ No newline at end of file
diff --git a/tests_end_to_end/page_objects/TracesPage.py b/tests_end_to_end/page_objects/TracesPage.py
index 2df04eab05..af9f784e10 100644
--- a/tests_end_to_end/page_objects/TracesPage.py
+++ b/tests_end_to_end/page_objects/TracesPage.py
@@ -8,6 +8,5 @@ def __init__(self, page: Page):
 
     def get_all_trace_names(self):
         self.page.wait_for_selector(self.trace_names_selector)
-
         names = self.page.locator(self.trace_names_selector).all_inner_texts()
         return names
\ No newline at end of file
diff --git a/tests_end_to_end/page_objects/TracesPageSpansMenu.py b/tests_end_to_end/page_objects/TracesPageSpansMenu.py
index a0a495ebc4..853e663230 100644
--- a/tests_end_to_end/page_objects/TracesPageSpansMenu.py
+++ b/tests_end_to_end/page_objects/TracesPageSpansMenu.py
@@ -3,6 +3,27 @@
 class TracesPageSpansMenu:
     def __init__(self, page: Page):
         self.page = page
+        self.input_output_tab = 'Input/Output'
+        self.feedback_scores_tab = 'Feedback scores'
+        self.metadata_tab = 'Metadata'
+
+    def get_first_trace_by_name(self, name):
+        return self.page.get_by_role('button', name=name).first
+    
+    def get_first_span_by_name(self, name):
+        return self.page.get_by_role('button', name=name).first
 
     def check_span_exists_by_name(self, name):
-        expect(self.page.get_by_role('button', name=name)).to_be_visible()
\ No newline at end of file
+        expect(self.page.get_by_role('button', name=name)).to_be_visible()
+
+    def check_tag_exists_by_name(self, tag_name):
+        expect(self.page.get_by_text(tag_name)).to_be_visible()
+
+    def get_input_output_tab(self):
+        return self.page.get_by_role('tab', name=self.input_output_tab)
+
+    def get_feedback_scores_tab(self):
+        return self.page.get_by_role('tab', name=self.feedback_scores_tab)
+    
+    def get_metadata_tab(self):
+        return self.page.get_by_role('tab', name='Metadata')
\ No newline at end of file