From 2b5846b825eddc1810ca3e5a576ea673a4b4148b Mon Sep 17 00:00:00 2001 From: AndreiCautisanu <30831438+AndreiCautisanu@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:21:47 +0300 Subject: [PATCH] [OPIK-194] rest of e2e sanity tests, docstrings and architecture cleanup (#364) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * trace details test and basic dataset test * cleanup and finishing up * changed name of job * removed on:push --------- Co-authored-by: Andrei Căutișanu --- .github/workflows/sanity.yml | 2 +- .../application_sanity/conftest.py | 58 +++++++- .../application_sanity/test_sanity.py | 126 ++++++++++++++---- tests_end_to_end/page_objects/DatasetsPage.py | 17 +++ .../page_objects/ExperimentsPage.py | 12 ++ .../page_objects/IndividualDatasetPage.py | 9 ++ tests_end_to_end/page_objects/TracesPage.py | 1 - .../page_objects/TracesPageSpansMenu.py | 23 +++- 8 files changed, 216 insertions(+), 32 deletions(-) create mode 100644 tests_end_to_end/page_objects/DatasetsPage.py create mode 100644 tests_end_to_end/page_objects/ExperimentsPage.py create mode 100644 tests_end_to_end/page_objects/IndividualDatasetPage.py diff --git a/.github/workflows/sanity.yml b/.github/workflows/sanity.yml index aaf0517947..5a1700bfdc 100644 --- a/.github/workflows/sanity.yml +++ b/.github/workflows/sanity.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: jobs: - test_installation: + e2e_sanity: runs-on: ubuntu-20.04 steps: diff --git a/tests_end_to_end/application_sanity/conftest.py b/tests_end_to_end/application_sanity/conftest.py index 6ef87c9133..815327580c 100644 --- a/tests_end_to_end/application_sanity/conftest.py +++ b/tests_end_to_end/application_sanity/conftest.py @@ -2,10 +2,17 @@ import os import opik import yaml +import json from opik.configurator.configure import configure from opik.evaluation import evaluate from opik.evaluation.metrics import Contains, Equals from opik import opik_context, track, DatasetItem +from playwright.sync_api import Page + +from page_objects.ProjectsPage import ProjectsPage +from page_objects.TracesPage import TracesPage +from page_objects.DatasetsPage import DatasetsPage +from page_objects.ExperimentsPage import ExperimentsPage @pytest.fixture(scope='session', autouse=True) @@ -30,6 +37,42 @@ def client(config): return opik.Opik(project_name=config['project']['name'], host='http://localhost:5173/api') +@pytest.fixture(scope='function') +def projects_page(page: Page): + projects_page = ProjectsPage(page) + projects_page.go_to_page() + return projects_page + + +@pytest.fixture(scope='function') +def projects_page_timeout(page: Page): + projects_page = ProjectsPage(page) + projects_page.go_to_page() + projects_page.page.wait_for_timeout(7000) + return projects_page + + +@pytest.fixture(scope='function') +def traces_page(page: Page, projects_page, config): + projects_page.click_project(config['project']['name']) + traces_page = TracesPage(page) + return traces_page + + +@pytest.fixture(scope='function') +def datasets_page(page: Page): + datasets_page = DatasetsPage(page) + datasets_page.go_to_page() + return datasets_page + + +@pytest.fixture(scope='function') +def experiments_page(page: Page): + experiments_page = ExperimentsPage(page) + experiments_page.go_to_page() + return experiments_page + + @pytest.fixture(scope='module') def log_traces_and_spans_low_level(client, config): """ @@ -126,7 +169,7 @@ def make_trace(x): make_trace(x) -@pytest.fixture(scope='function') +@pytest.fixture(scope='module') def dataset(config, client): dataset_config = { 'name': config['dataset']['name'], @@ -141,7 +184,7 @@ def dataset(config, client): return dataset -@pytest.fixture(scope='function') +@pytest.fixture(scope='module') def create_experiments(config, dataset): exp_config = { 'prefix': config['experiments']['prefix'], @@ -186,3 +229,14 @@ def eval_equals(x: DatasetItem): scoring_metrics=[equals_metric] ) + +@pytest.fixture(scope='function') +def dataset_content(config): + curr_dir = os.path.dirname(__file__) + dataset_filepath = os.path.join(curr_dir, config['dataset']['filename']) + + data = [] + with open(dataset_filepath, 'r') as f: + for line in f: + data.append(json.loads(line)) + return data \ No newline at end of file diff --git a/tests_end_to_end/application_sanity/test_sanity.py b/tests_end_to_end/application_sanity/test_sanity.py index d940228f13..3b3badbd2f 100644 --- a/tests_end_to_end/application_sanity/test_sanity.py +++ b/tests_end_to_end/application_sanity/test_sanity.py @@ -1,28 +1,31 @@ import pytest +import json + from playwright.sync_api import Page, expect -from page_objects.ProjectsPage import ProjectsPage -from page_objects.TracesPage import TracesPage + from page_objects.TracesPageSpansMenu import TracesPageSpansMenu +from page_objects.IndividualDatasetPage import IndividualDatasetPage + +def test_project_name(projects_page_timeout, log_traces_and_spans_decorator, log_traces_and_spans_low_level): + ''' + Checks that the project created via the fixtures exists + Does a timeout of 5 seconds to wait for the traces to show up in UI for later tests (TODO: figure out a better way to do this) -def test_project_name(page: Page, log_traces_and_spans_decorator, log_traces_and_spans_low_level): - projects_page = ProjectsPage(page) - projects_page.go_to_page() - projects_page.check_project_exists('test-project') + 1. Open projects page + 2. Check the created project exists + ''' + projects_page_timeout.check_project_exists('test-project') -def test_traces_created(page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator): - #navigate to project - projects_page = ProjectsPage(page) - projects_page.go_to_page() - - #wait for data to actually arrive to the frontend - #TODO: replace this with a smarter waiting mechanism - page.wait_for_timeout(5000) - projects_page.click_project(config['project']['name']) +def test_traces_created(traces_page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator): + ''' + Checks that every trace defined in the sanity_config file is present in the project - #grab all traces of project - traces_page = TracesPage(page) + 1. Open the traces page of the project + 2. Grab all the names of the traces (should never set more than 15 in config so 1 page is safe) + 3. Check that every possible name of the traces as defined in sanity_config.yaml is present in the names list + ''' trace_names = traces_page.get_all_trace_names() client_prefix = config['traces']['client']['prefix'] @@ -33,16 +36,13 @@ def test_traces_created(page, config, log_traces_and_spans_low_level, log_traces assert prefix+str(count) in trace_names -def test_spans_of_traces(page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator): - projects_page = ProjectsPage(page) - projects_page.go_to_page() - - #wait for data to actually arrive to the frontend - #TODO: replace this with a smarter waiting mechanism - projects_page.click_project(config['project']['name']) - - #grab all traces of project - traces_page = TracesPage(page) +def test_spans_of_traces(page, traces_page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator): + ''' + Checks that every trace has the correct number and names of spans defined in the sanity_config.yaml file + 1. Open the traces page of the project + 2. Go through each trace and click it + 3. Check that the spans are present in each trace + ''' trace_names = traces_page.get_all_trace_names() for trace in trace_names: @@ -54,3 +54,75 @@ def test_spans_of_traces(page, config, log_traces_and_spans_low_level, log_trace spans_menu.check_span_exists_by_name(f'{prefix}{count}') +def test_trace_and_span_details(page, traces_page, config, log_traces_and_spans_low_level, log_traces_and_spans_decorator): + ''' + Checks that for each trace and spans, the attributes defined in sanity_config.yaml are present + 1. Go through each trace of the project + 2. Check the created tags are present + 3. Check the created feedback scores are present + 4. Check the defined metadata is present + 5. Go through each span of the traces and repeat 2-4 + ''' + trace_names = traces_page.get_all_trace_names() + + for trace in trace_names: + page.get_by_text(trace).click() + spans_menu = TracesPageSpansMenu(page) + trace_type = trace.split('-')[0] + tag_names = config['traces'][trace_type]['tags'] + + for tag in tag_names: + spans_menu.check_tag_exists_by_name(tag) + + spans_menu.get_feedback_scores_tab().click() + + for score in config['traces'][trace_type]['feedback-scores']: + expect(page.get_by_role('cell', name=score, exact=True)).to_be_visible() + expect(page.get_by_role('cell', name=str(config['traces'][trace_type]['feedback-scores'][score]), exact=True)).to_be_visible() + + spans_menu.get_metadata_tab().click() + for md_key in config['traces'][trace_type]['metadata']: + expect(page.get_by_text(f'{md_key}: {config['traces'][trace_type]['metadata'][md_key]}')).to_be_visible() + + for count in range(config['spans']['count']): + prefix = config['spans'][trace_type]['prefix'] + spans_menu.get_first_span_by_name(f'{prefix}{count}').click() + + spans_menu.get_feedback_scores_tab().click() + for score in config['spans'][trace_type]['feedback-scores']: + expect(page.get_by_role('cell', name=score, exact=True)).to_be_visible() + expect(page.get_by_role('cell', name=str(config['spans'][trace_type]['feedback-scores'][score]), exact=True)).to_be_visible() + + spans_menu.get_metadata_tab().click() + for md_key in config['spans'][trace_type]['metadata']: + expect(page.get_by_text(f'{md_key}: {config['spans'][trace_type]['metadata'][md_key]}')).to_be_visible() + + +def test_dataset_name(datasets_page, config, dataset): + ''' + Checks that the dataset created via the fixture as defined in sanity_config.yaml is present on the datasets page + ''' + datasets_page.check_dataset_exists_by_name(config['dataset']['name']) + + +def test_dataset_items(page: Page, datasets_page, config, dataset_content): + ''' + Checks that the traces created via the fixture and defined in sanity_dataset.jsonl are present within the dataset + ''' + datasets_page.select_database_by_name(config['dataset']['name']) + + individual_dataset_page = IndividualDatasetPage(page) + for item in dataset_content: + individual_dataset_page.check_cell_exists_by_text(json.dumps(item['input']).replace('{', '{ ').replace('}', ' }')) + individual_dataset_page.check_cell_exists_by_text(json.dumps(item['expected_output']).replace('{', '{ ').replace('}', ' }')) + + +def test_experiments_exist(experiments_page, config, create_experiments): + ''' + Checks that the experiments created via the fixture are present and have the correct values for the metrics (experiments defined in a way to always return the same results) + ''' + experiments_page.check_experiment_exists_by_name('test-experiment-Equals') + expect(experiments_page.page.get_by_text('Equals0').first).to_be_visible() + + experiments_page.check_experiment_exists_by_name('test-experiment-Contains') + expect(experiments_page.page.get_by_text('Contains1').first).to_be_visible() diff --git a/tests_end_to_end/page_objects/DatasetsPage.py b/tests_end_to_end/page_objects/DatasetsPage.py new file mode 100644 index 0000000000..d0509efba4 --- /dev/null +++ b/tests_end_to_end/page_objects/DatasetsPage.py @@ -0,0 +1,17 @@ +from playwright.sync_api import Page, expect + +class DatasetsPage: + def __init__(self, page: Page): + self.page = page + self.url = '/default/datasets' + + def go_to_page(self): + self.page.goto(self.url) + + def select_database_by_name(self, name): + self.page.get_by_text(name, exact=True).first.click() + + def check_dataset_exists_by_name(self, dataset_name): + expect(self.page.get_by_text(dataset_name)).to_be_visible() + + \ No newline at end of file diff --git a/tests_end_to_end/page_objects/ExperimentsPage.py b/tests_end_to_end/page_objects/ExperimentsPage.py new file mode 100644 index 0000000000..0903206b66 --- /dev/null +++ b/tests_end_to_end/page_objects/ExperimentsPage.py @@ -0,0 +1,12 @@ +from playwright.sync_api import Page, expect + +class ExperimentsPage: + def __init__(self, page: Page): + self.page = page + self.url = '/default/experiments' + + def go_to_page(self): + self.page.goto(self.url) + + def check_experiment_exists_by_name(self, name): + expect(self.page.get_by_text(name).first).to_be_visible() \ No newline at end of file diff --git a/tests_end_to_end/page_objects/IndividualDatasetPage.py b/tests_end_to_end/page_objects/IndividualDatasetPage.py new file mode 100644 index 0000000000..00c3084433 --- /dev/null +++ b/tests_end_to_end/page_objects/IndividualDatasetPage.py @@ -0,0 +1,9 @@ +from playwright.sync_api import Page, expect + +class IndividualDatasetPage: + def __init__(self, page: Page): + self.page = page + self.traces_table = page.get_by_role('table') + + def check_cell_exists_by_text(self, text): + expect(self.traces_table.get_by_text(text)).to_be_visible() \ No newline at end of file diff --git a/tests_end_to_end/page_objects/TracesPage.py b/tests_end_to_end/page_objects/TracesPage.py index 2df04eab05..af9f784e10 100644 --- a/tests_end_to_end/page_objects/TracesPage.py +++ b/tests_end_to_end/page_objects/TracesPage.py @@ -8,6 +8,5 @@ def __init__(self, page: Page): def get_all_trace_names(self): self.page.wait_for_selector(self.trace_names_selector) - names = self.page.locator(self.trace_names_selector).all_inner_texts() return names \ No newline at end of file diff --git a/tests_end_to_end/page_objects/TracesPageSpansMenu.py b/tests_end_to_end/page_objects/TracesPageSpansMenu.py index a0a495ebc4..853e663230 100644 --- a/tests_end_to_end/page_objects/TracesPageSpansMenu.py +++ b/tests_end_to_end/page_objects/TracesPageSpansMenu.py @@ -3,6 +3,27 @@ class TracesPageSpansMenu: def __init__(self, page: Page): self.page = page + self.input_output_tab = 'Input/Output' + self.feedback_scores_tab = 'Feedback scores' + self.metadata_tab = 'Metadata' + + def get_first_trace_by_name(self, name): + return self.page.get_by_role('button', name=name).first + + def get_first_span_by_name(self, name): + return self.page.get_by_role('button', name=name).first def check_span_exists_by_name(self, name): - expect(self.page.get_by_role('button', name=name)).to_be_visible() \ No newline at end of file + expect(self.page.get_by_role('button', name=name)).to_be_visible() + + def check_tag_exists_by_name(self, tag_name): + expect(self.page.get_by_text(tag_name)).to_be_visible() + + def get_input_output_tab(self): + return self.page.get_by_role('tab', name=self.input_output_tab) + + def get_feedback_scores_tab(self): + return self.page.get_by_role('tab', name=self.feedback_scores_tab) + + def get_metadata_tab(self): + return self.page.get_by_role('tab', name='Metadata') \ No newline at end of file