Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

All expectations fail if one fails with 'unrecognized condition_parser None for Spark execution engine' #10709

Open
iamamutt opened this issue Nov 26, 2024 · 0 comments

Comments

@iamamutt
Copy link

Describe the bug
Failure to specify condition_parser for a single expectation results in all expectations failing.

To Reproduce

import tempfile

from pprint import pprint

import great_expectations as gx
import pandas as pd

data = {
    'col1': [1, 2, 3, 4, 5],
    'col2': ['A', 'B', 'C', 'D', None],
    'col3': [1.1, None, 3.3, 4.4, 5.5],
}


def validate(dir_path: str, file_name: str):
    context = gx.get_context(mode='ephemeral')
    suite = context.suites.add(
        gx.ExpectationSuite(
            name='test-suite',
            expectations=[
                gx.expectations.ExpectColumnValuesToNotBeNull(
                    column='col1', result_format='COMPLETE'
                ),
                gx.expectations.ExpectColumnValuesToBeInSet(
                    column='col2',
                    value_set=['A', 'B', 'C'],
                    row_condition='col3 IS NOT NULL',
                    mostly=0.665,
                    # condition_parser='spark',
                    result_format='COMPLETE',
                ),
            ],
        )
    )

    return gx.ValidationDefinition(
        name='test-validation',
        data=(
            context.data_sources.add_spark_filesystem(
                name='test-spark-fs',
                base_directory=dir_path,
            )
            .add_csv_asset(
                name='csv-asset',
                sep=',',
                header=True,
                infer_schema=True,
            )
            .add_batch_definition_path(
                name='test-data',
                path=file_name,
            )
        ),
        suite=suite,
    ).run()


with tempfile.TemporaryDirectory() as dir_path:
    file_name = 'data.csv'
    pd.DataFrame(data).to_csv(f'{dir_path}/{file_name}', index=False)
    result = validate(dir_path, file_name)
    pprint(result.to_json_dict(), sort_dicts=False, width=100)

Expected behavior
Only expectations with errors should fail and show exception info

Environment (please complete the following information):

  • Operating System: Linux
  • Great Expectations Version: 1.2.4
  • Data Source: Spark file

Additional context

{
  "success": false,
  "results": [
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_not_be_null",
        "kwargs": {
          "result_format": "COMPLETE",
          "column": "col1",
          "batch_id": "test-spark-fs-csv-asset"
        },
        "meta": {},
        "id": "0e734de0-e872-43e7-adc7-07c868c689d7"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "('table.row_count', '0dfa72ce94f9f181a7dc04305a6c30f7', ())": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 545, in _process_direct_and_bundled_metric_computation_configurations\n    self.resolve_metric_bundle(metric_fn_bundle=metric_fn_bundle_configurations)\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 900, in resolve_metric_bundle\n    df: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n    raise GreatExpectationsError(  # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n    self._execution_engine.resolve_metrics(\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n    return self._process_direct_and_bundled_metric_computation_configurations(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 549, in _process_direct_and_bundled_metric_computation_configurations\n    raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
          "exception_message": "unrecognized condition_parser None for Spark execution engine",
          "raised_exception": true
        }
      }
    },
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_be_in_set",
        "kwargs": {
          "result_format": "COMPLETE",
          "column": "col2",
          "mostly": 0.665,
          "row_condition": "col3 IS NOT NULL",
          "value_set": [
            "A",
            "B",
            "C"
          ],
          "batch_id": "test-spark-fs-csv-asset"
        },
        "meta": {},
        "id": "c648e872-154c-4374-9cf1-cb8751e1c6d2"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "('table.column_types', 'e48bc318d7e9c92e270e3f7ab807c1b8', 'include_nested=True')": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 532, in _process_direct_and_bundled_metric_computation_configurations\n    metric_computation_configuration.metric_fn(  # type: ignore[misc] # F not callable\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/expectations/metrics/metric_provider.py\", line 60, in inner_func\n    return metric_fn(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/expectations/metrics/table_metrics/table_column_types.py\", line 81, in _spark\n    df, _, _ = execution_engine.get_compute_domain(\n               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 800, in get_compute_domain\n    data: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n    raise GreatExpectationsError(  # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n    self._execution_engine.resolve_metrics(\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n    return self._process_direct_and_bundled_metric_computation_configurations(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 537, in _process_direct_and_bundled_metric_computation_configurations\n    raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
          "exception_message": "unrecognized condition_parser None for Spark execution engine",
          "raised_exception": true
        },
        "('table.row_count', 'e48bc318d7e9c92e270e3f7ab807c1b8', ())": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 545, in _process_direct_and_bundled_metric_computation_configurations\n    self.resolve_metric_bundle(metric_fn_bundle=metric_fn_bundle_configurations)\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 900, in resolve_metric_bundle\n    df: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n    raise GreatExpectationsError(  # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n    self._execution_engine.resolve_metrics(\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n    return self._process_direct_and_bundled_metric_computation_configurations(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 549, in _process_direct_and_bundled_metric_computation_configurations\n    raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
          "exception_message": "unrecognized condition_parser None for Spark execution engine",
          "raised_exception": true
        }
      }
    }
  ],
  "suite_name": "test-suite",
  "suite_parameters": {},
  "statistics": {
    "evaluated_expectations": 2,
    "successful_expectations": 0,
    "unsuccessful_expectations": 2,
    "success_percent": 0.0
  },
  "meta": {
    "great_expectations_version": "1.2.4",
    "batch_spec": {
      "path": "/tmp/tmpgf8032g5/data.csv",
      "reader_method": "csv",
      "reader_options": {
        "sep": ",",
        "header": true,
        "inferSchema": true
      }
    },
    "batch_markers": {
      "ge_load_time": "20241126T222652.157081Z"
    },
    "active_batch_definition": {
      "datasource_name": "test-spark-fs",
      "data_connector_name": "fluent",
      "data_asset_name": "csv-asset",
      "batch_identifiers": {
        "path": "data.csv"
      },
      "batching_regex": "(?P<path>data.csv)"
    },
    "validation_id": "cd188ea7-bedf-4f8a-9898-1cf823b69b5f",
    "checkpoint_id": null,
    "batch_parameters": null
  },
  "id": null
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
Status: To Do
Development

No branches or pull requests

1 participant