diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_container.py b/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_container.py deleted file mode 100644 index cd81c99c49..0000000000 --- a/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_container.py +++ /dev/null @@ -1,28 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Generic, List, TypeVar - -T = TypeVar("T") - - -class SemanticModelContainer(ABC, Generic[T]): # noqa: D101 - @abstractmethod - def get(self, semantic_model_name: str) -> T: # noqa: D102 - pass - - @abstractmethod - def values(self) -> List[T]: # noqa: D102 - pass - - @abstractmethod - def keys(self) -> List[str]: # noqa: D102 - pass - - @abstractmethod - def __contains__(self, item: str) -> bool: # noqa: D105 - pass - - @abstractmethod - def put(self, key: str, value: T) -> None: # noqa: D102 - pass diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py index 057bc349a7..4dc45ea70e 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/semantic_model_lookup.py @@ -3,9 +3,7 @@ import logging from typing import Dict, List, Optional, Sequence, Set -from dbt_semantic_interfaces.protocols.dimension import Dimension from dbt_semantic_interfaces.protocols.entity import Entity -from dbt_semantic_interfaces.protocols.measure import Measure from dbt_semantic_interfaces.protocols.semantic_manifest import SemanticManifest from dbt_semantic_interfaces.protocols.semantic_model import SemanticModel from dbt_semantic_interfaces.references import ( @@ -16,14 +14,13 @@ SemanticModelReference, TimeDimensionReference, ) -from dbt_semantic_interfaces.type_enums import AggregationType, DimensionType, TimeGranularity +from dbt_semantic_interfaces.type_enums import DimensionType from metricflow_semantics.errors.error_classes import InvalidSemanticModelError from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat from metricflow_semantics.model.semantics.dimension_lookup import DimensionLookup from metricflow_semantics.model.semantics.element_group import ElementGrouper from metricflow_semantics.model.semantics.measure_lookup import MeasureLookup -from metricflow_semantics.model.semantics.semantic_model_helper import SemanticModelHelper from metricflow_semantics.model.spec_converters import MeasureConverter from metricflow_semantics.naming.linkable_spec_name import StructuredLinkableSpecName from metricflow_semantics.specs.dimension_spec import DimensionSpec @@ -31,7 +28,7 @@ from metricflow_semantics.specs.instance_spec import LinkableInstanceSpec from metricflow_semantics.specs.measure_spec import MeasureSpec from metricflow_semantics.specs.non_additive_dimension_spec import NonAdditiveDimensionSpec -from metricflow_semantics.specs.time_dimension_spec import DEFAULT_TIME_GRANULARITY, TimeDimensionSpec +from metricflow_semantics.specs.time_dimension_spec import TimeDimensionSpec from metricflow_semantics.time.granularity import ExpandedTimeGranularity logger = logging.getLogger(__name__) @@ -48,8 +45,6 @@ def __init__(self, model: SemanticManifest, custom_granularities: Dict[str, Expa """ self._custom_granularities = custom_granularities self._measure_index: Dict[MeasureReference, SemanticModel] = {} - self._measure_aggs: Dict[MeasureReference, AggregationType] = {} - self._measure_agg_time_dimension: Dict[MeasureReference, TimeDimensionReference] = {} self._measure_non_additive_dimension_specs: Dict[MeasureReference, NonAdditiveDimensionSpec] = {} self._dimension_index: Dict[DimensionReference, List[SemanticModel]] = {} self._entity_index: Dict[EntityReference, List[SemanticModel]] = {} @@ -66,12 +61,6 @@ def __init__(self, model: SemanticManifest, custom_granularities: Dict[str, Expa for semantic_model in sorted_semantic_models: self._add_semantic_model(semantic_model) - # Cache for defined time granularity. - self._time_dimension_to_defined_time_granularity: Dict[TimeDimensionReference, TimeGranularity] = {} - - # Cache for agg. time dimension for measure. - self._measure_reference_to_agg_time_dimension_specs: Dict[MeasureReference, Sequence[TimeDimensionSpec]] = {} - self._measure_lookup = MeasureLookup(sorted_semantic_models, custom_granularities) self._dimension_lookup = DimensionLookup(sorted_semantic_models) @@ -79,27 +68,6 @@ def get_dimension_references(self) -> Sequence[DimensionReference]: """Retrieve all dimension references from the collection of semantic models.""" return tuple(self._dimension_index.keys()) - def get_dimension(self, dimension_reference: DimensionReference) -> Dimension: - """Retrieves a full dimension object by name.""" - # If the reference passed is a TimeDimensionReference, convert to DimensionReference. - dimension_reference = DimensionReference(dimension_reference.element_name) - - semantic_models = self._dimension_index.get(dimension_reference) - if not semantic_models: - raise ValueError( - f"Could not find dimension with name '{dimension_reference.element_name}' in configured semantic models" - ) - - return SemanticModelHelper.get_dimension_from_semantic_model( - # Dimension object should match across semantic models, so just use the first semantic model. - semantic_model=semantic_models[0], - dimension_reference=dimension_reference, - ) - - def get_time_dimension(self, time_dimension_reference: TimeDimensionReference) -> Dimension: - """Retrieves a full dimension object by name.""" - return self.get_dimension(dimension_reference=time_dimension_reference.dimension_reference) - @property def measure_references(self) -> Sequence[MeasureReference]: """Return all measure references from the collection of semantic models.""" @@ -113,35 +81,10 @@ def non_additive_dimension_specs_by_measure(self) -> Dict[MeasureReference, NonA """ return self._measure_non_additive_dimension_specs - def get_measure(self, measure_reference: MeasureReference) -> Measure: - """Retrieve the measure model object associated with the measure reference.""" - if measure_reference not in self._measure_index: - raise ValueError(f"Could not find measure with name ({measure_reference}) in configured semantic models") - - return SemanticModelHelper.get_measure_from_semantic_model( - semantic_model=self.get_semantic_model_for_measure(measure_reference), measure_reference=measure_reference - ) - def get_entity_references(self) -> Sequence[EntityReference]: """Retrieve all entity references from the collection of semantic models.""" return list(self._entity_index.keys()) - def get_semantic_model_for_measure(self, measure_reference: MeasureReference) -> SemanticModel: # noqa: D102 - semantic_model = self._measure_index.get(measure_reference) - assert semantic_model, ( - f"Semantic model not found for measure: {repr(measure_reference)}. " - f"This indicates either internal misconfiguration or that the measure does not exist." - ) - return semantic_model - - def get_agg_time_dimension_for_measure(self, measure_reference: MeasureReference) -> TimeDimensionReference: - """Retrieves the aggregate time dimension that is associated with the measure reference. - - This is the time dimension along which the measure will be aggregated when a metric built on this measure - is queried with metric_time. - """ - return self._measure_agg_time_dimension[measure_reference] - def get_entity_in_semantic_model(self, ref: SemanticModelElementReference) -> Optional[Entity]: """Retrieve the entity matching the element -> semantic model mapping, if any.""" semantic_model = self.get_by_reference(ref.semantic_model_reference) @@ -165,13 +108,6 @@ def _add_semantic_model(self, semantic_model: SemanticModel) -> None: if semantic_model.reference in self._semantic_model_reference_to_semantic_model: errors.append(f"Semantic model {semantic_model.reference} already added.") - for measure in semantic_model.measures: - if measure.reference in self._measure_aggs and self._measure_aggs[measure.reference] != measure.agg: - errors.append( - f"Conflicting aggregation (agg) for measure {measure.reference}. Currently registered as " - f"{self._measure_aggs[measure.reference]} but got {measure.agg}." - ) - if len(errors) > 0: raise InvalidSemanticModelError(f"Error adding {semantic_model.reference}. Got errors: {errors}") @@ -180,7 +116,6 @@ def _add_semantic_model(self, semantic_model: SemanticModel) -> None: ]() for measure in semantic_model.measures: - self._measure_aggs[measure.reference] = measure.agg self._measure_index[measure.reference] = semantic_model agg_time_dimension_reference = semantic_model.checked_agg_time_dimension_for_measure(measure.reference) @@ -209,9 +144,6 @@ def _add_semantic_model(self, semantic_model: SemanticModel) -> None: ), value=MeasureConverter.convert_to_measure_spec(measure=measure), ) - self._measure_agg_time_dimension[measure.reference] = TimeDimensionReference( - element_name=agg_time_dimension.name - ) if measure.non_additive_dimension: non_additive_dimension_spec = NonAdditiveDimensionSpec( @@ -276,52 +208,6 @@ def get_element_spec_for_name(self, element_name: str) -> LinkableInstanceSpec: else: raise ValueError(f"Unable to find linkable element {element_name} in manifest") - def get_agg_time_dimension_specs_for_measure( - self, measure_reference: MeasureReference - ) -> Sequence[TimeDimensionSpec]: - """Get the agg time dimension specs that can be used in place of metric time for this measure.""" - result = self._measure_reference_to_agg_time_dimension_specs.get(measure_reference) - if result is not None: - return result - - result = self._get_agg_time_dimension_specs_for_measure(measure_reference) - self._measure_reference_to_agg_time_dimension_specs[measure_reference] = result - return result - - def _get_agg_time_dimension_specs_for_measure( - self, measure_reference: MeasureReference - ) -> Sequence[TimeDimensionSpec]: - agg_time_dimension = self.get_agg_time_dimension_for_measure(measure_reference) - # A measure's agg_time_dimension is required to be in the same semantic model as the measure, - # so we can assume the same semantic model for both measure and dimension. - semantic_model = self.get_semantic_model_for_measure(measure_reference) - entity_link = SemanticModelHelper.resolved_primary_entity(semantic_model) - return TimeDimensionSpec.generate_possible_specs_for_time_dimension( - time_dimension_reference=agg_time_dimension, - entity_links=(entity_link,), - custom_granularities=self._custom_granularities, - ) - - def get_defined_time_granularity(self, time_dimension_reference: TimeDimensionReference) -> TimeGranularity: - """Time granularity from the time dimension's YAML definition. If not set, defaults to DAY.""" - result = self._time_dimension_to_defined_time_granularity.get(time_dimension_reference) - - if result is not None: - return result - - result = self._get_defined_time_granularity(time_dimension_reference) - self._time_dimension_to_defined_time_granularity[time_dimension_reference] = result - return result - - def _get_defined_time_granularity(self, time_dimension_reference: TimeDimensionReference) -> TimeGranularity: - time_dimension = self.get_dimension(time_dimension_reference) - - defined_time_granularity = DEFAULT_TIME_GRANULARITY - if time_dimension.type_params and time_dimension.type_params.time_granularity: - defined_time_granularity = time_dimension.type_params.time_granularity - - return defined_time_granularity - @property def measure_lookup(self) -> MeasureLookup: # noqa: D102 return self._measure_lookup diff --git a/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py b/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py index 907c5d3bb9..df753ceafa 100644 --- a/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py +++ b/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py @@ -77,28 +77,6 @@ def test_get_names( # noqa: D103 ) -def test_get_elements(semantic_model_lookup: SemanticModelLookup) -> None: # noqa: D103 - for dimension_reference in semantic_model_lookup.get_dimension_references(): - assert ( - semantic_model_lookup.get_dimension(dimension_reference=dimension_reference).reference - == dimension_reference - ) - for measure_reference in semantic_model_lookup.measure_references: - measure_reference = MeasureReference(element_name=measure_reference.element_name) - assert semantic_model_lookup.get_measure(measure_reference=measure_reference).reference == measure_reference - - -def test_get_semantic_model_for_measure(semantic_model_lookup: SemanticModelLookup) -> None: # noqa: D103 - bookings_source = semantic_model_lookup.get_semantic_model_for_measure(MeasureReference(element_name="bookings")) - assert bookings_source.name == "bookings_source" - - views_source = semantic_model_lookup.get_semantic_model_for_measure(MeasureReference(element_name="views")) - assert views_source.name == "views_source" - - listings_source = semantic_model_lookup.get_semantic_model_for_measure(MeasureReference(element_name="listings")) - assert listings_source.name == "listings_latest" - - def test_local_linked_elements_for_metric( # noqa: D103 request: FixtureRequest, mf_test_configuration: MetricFlowTestConfiguration, metric_lookup: MetricLookup ) -> None: @@ -239,12 +217,3 @@ def test_get_valid_agg_time_dimensions_for_metric( # noqa: D103 assert metric_agg_time_dim.reference == measure_agg_time_dims[0] else: assert len(metric_agg_time_dims) == 0 - - -def test_get_agg_time_dimension_specs_for_measure(semantic_model_lookup: SemanticModelLookup) -> None: # noqa: D103 - for measure_name in ["bookings", "views", "listings"]: - measure_reference = MeasureReference(measure_name) - agg_time_dim_specs = semantic_model_lookup.get_agg_time_dimension_specs_for_measure(measure_reference) - agg_time_dim_reference = semantic_model_lookup.get_agg_time_dimension_for_measure(measure_reference) - for spec in agg_time_dim_specs: - assert spec.reference == agg_time_dim_reference diff --git a/tests_metricflow/performance/__init__.py b/tests_metricflow/performance/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_metricflow/performance/categorical_dimension_generator.py b/tests_metricflow/performance/categorical_dimension_generator.py new file mode 100644 index 0000000000..8334aae80a --- /dev/null +++ b/tests_metricflow/performance/categorical_dimension_generator.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from functools import cached_property + +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class CategoricalDimensionGenerator: + """Helps generate the categorical dimensions in the semantic manifest. + + The index for the dimension refers to the index when all unique dimensions in the semantic manifest are enumerated. + """ + + def __init__(self, parameter_set: SyntheticManifestParameterSet) -> None: # noqa: D107 + self._parameter_set = parameter_set + + def get_dimension_name(self, dimension_index: int) -> str: # noqa: D102 + """Return the name of the dimension for the given index.""" + return f"dimension_{dimension_index:03}" + + @cached_property + def unique_dimension_count(self) -> int: # noqa: D102 + return ( + self._parameter_set.categorical_dimensions_per_semantic_model + * self._parameter_set.dimension_semantic_model_count + ) + + def get_next_wrapped_index(self, dimension_index: int) -> int: + """Return the next valid dimension index, wrapping back to 0 if it reaches the last index.""" + if dimension_index < 0: + raise ValueError(f"{dimension_index=} should be > 0") + + if dimension_index >= self.unique_dimension_count: + raise ValueError(f"{dimension_index=} should be < {self.unique_dimension_count}") + + return (dimension_index + 1) % self.unique_dimension_count diff --git a/tests_metricflow/performance/dimension_semantic_model_generator.py b/tests_metricflow/performance/dimension_semantic_model_generator.py new file mode 100644 index 0000000000..8e41bb4ea4 --- /dev/null +++ b/tests_metricflow/performance/dimension_semantic_model_generator.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +from typing import Sequence + +from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimension +from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity +from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation +from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel +from dbt_semantic_interfaces.type_enums import DimensionType, EntityType + +from tests_metricflow.performance.categorical_dimension_generator import CategoricalDimensionGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class DimensionSemanticModelGenerator: + """Helps generate a semantic model containing dimensions. + + Each of the generated semantic models contain an entity common to the semantic models containing measures so that + any measure can be queried by any dimension. + """ + + def __init__( # noqa: D107 + self, + parameter_set: SyntheticManifestParameterSet, + categorical_dimension_generator: CategoricalDimensionGenerator, + ) -> None: + self._parameter_set = parameter_set + self._dimension_generator = categorical_dimension_generator + + def generate_semantic_models(self) -> Sequence[PydanticSemanticModel]: # noqa: D102 + semantic_models = [] + for semantic_model_index in range(self._parameter_set.dimension_semantic_model_count): + entities = [ + PydanticEntity( + name=self._get_dimension_semantic_model_primary_entity_name(semantic_model_index), + type=EntityType.PRIMARY, + ), + PydanticEntity( + name=self._parameter_set.common_entity_name, + type=EntityType.UNIQUE, + ), + ] + + dimensions = [ + PydanticDimension( + name=self._get_dimension_name( + index_in_manifest=semantic_model_index, + index_in_model=dimension_index, + ), + type=DimensionType.CATEGORICAL, + ) + for dimension_index in range(self._parameter_set.categorical_dimensions_per_semantic_model) + ] + + semantic_model_name = self._get_dimension_semantic_model_name(semantic_model_index) + semantic_models.append( + PydanticSemanticModel( + name=semantic_model_name, + node_relation=PydanticNodeRelation( + schema_name="demo", + alias=semantic_model_name, + ), + entities=entities, + dimensions=dimensions, + ) + ) + + return semantic_models + + def _get_dimension_semantic_model_name(self, index_in_manifest: int) -> str: + return f"dimension_model_{index_in_manifest:03}" + + def _get_dimension_semantic_model_primary_entity_name(self, semantic_model_index: int) -> str: + return f"{self._get_dimension_semantic_model_name(semantic_model_index)}_primary_entity" + + def _get_dimension_name(self, index_in_manifest: int, index_in_model: int) -> str: + """Get the name of the dimension given the index. + + Args: + index_in_manifest: The index of the semantic model in the manifest. e.g. the 2nd semantic model in the + semantic manifest. + index_in_model: The index of the dimension in the semantic model. e.g. the 2nd dimension in the semantic + model. + + Returns: + The name of the dimension given the index. + """ + return self._dimension_generator.get_dimension_name( + index_in_manifest * self._parameter_set.categorical_dimensions_per_semantic_model + index_in_model + ) diff --git a/tests_metricflow/performance/measure_generator.py b/tests_metricflow/performance/measure_generator.py new file mode 100644 index 0000000000..508b3033c6 --- /dev/null +++ b/tests_metricflow/performance/measure_generator.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class MeasureGenerator: + """Helps generate the measures in the semantic manifest. + + The index for the measure refers to the index when measures in the semantic manifest are enumerated. + """ + + def __init__(self, parameter_set: SyntheticManifestParameterSet) -> None: # noqa: D107 + self._parameter_set = parameter_set + + def get_measure_name(self, measure_index: int) -> str: # noqa: D102 + return f"measure_{measure_index:03}" + + @property + def unique_measure_count(self) -> int: # noqa: D102 + return self._parameter_set.measures_per_semantic_model * self._parameter_set.measure_semantic_model_count + + def get_next_wrapped_index(self, measure_index: int) -> int: + """Return the next valid measure index, wrapping back to 0 if it reaches the last index.""" + if measure_index < 0: + raise ValueError(f"{measure_index=} should be > 0") + + if measure_index >= self.unique_measure_count: + raise ValueError(f"{measure_index=} should be < {self.unique_measure_count}") + + return (measure_index + 1) % self.unique_measure_count diff --git a/tests_metricflow/performance/measure_semantic_model_generator.py b/tests_metricflow/performance/measure_semantic_model_generator.py new file mode 100644 index 0000000000..808bfec9b7 --- /dev/null +++ b/tests_metricflow/performance/measure_semantic_model_generator.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from typing import Sequence + +from dbt_semantic_interfaces.implementations.elements.dimension import PydanticDimension, PydanticDimensionTypeParams +from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity +from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasure +from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation +from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel +from dbt_semantic_interfaces.type_enums import AggregationType, DimensionType, EntityType, TimeGranularity + +from tests_metricflow.performance.measure_generator import MeasureGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class MeasureSemanticModelGenerator: + """Helps generate semantic models containing measures. + + Each of the generated semantic models contain an entity common to the semantic models containing dimensions so that any + measure can be queried by any dimension. + """ + + def __init__( # noqa: D107 + self, + parameter_set: SyntheticManifestParameterSet, + measure_generator: MeasureGenerator, + ) -> None: + self._parameter_set = parameter_set + self._measure_generator = measure_generator + + def generate_semantic_models(self) -> Sequence[PydanticSemanticModel]: # noqa: D102 + semantic_models = [] + measures_per_semantic_model = self._parameter_set.measures_per_semantic_model + next_measure_index = 0 + + for semantic_model_index in range(self._parameter_set.measure_semantic_model_count): + measures = [] + + for _ in range(measures_per_semantic_model): + measures.append( + PydanticMeasure( + name=self._measure_generator.get_measure_name(next_measure_index), + agg=AggregationType.SUM, + agg_time_dimension="ds", + ) + ) + next_measure_index = self._measure_generator.get_next_wrapped_index(next_measure_index) + + entities = [ + PydanticEntity( + name=self._get_primary_entity_name_for_measure_semantic_model(semantic_model_index), + type=EntityType.PRIMARY, + ), + PydanticEntity( + name=self._parameter_set.common_entity_name, + type=EntityType.UNIQUE, + ), + ] + + dimensions = [ + PydanticDimension( + name="ds", + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams( + time_granularity=TimeGranularity.DAY, + ), + ), + ] + semantic_model_name = self._get_measure_semantic_model_name(semantic_model_index) + semantic_models.append( + PydanticSemanticModel( + name=semantic_model_name, + node_relation=PydanticNodeRelation( + schema_name="demo", + alias=semantic_model_name, + ), + measures=measures, + entities=entities, + dimensions=dimensions, + ) + ) + + return semantic_models + + def _get_measure_semantic_model_name(self, semantic_model_index: int) -> str: + return f"measure_model_{semantic_model_index:03}" + + def _get_primary_entity_name_for_measure_semantic_model(self, semantic_model_index: int) -> str: + return f"measure_model_{semantic_model_index:03}_primary_entity" diff --git a/tests_metricflow/performance/metric_generator.py b/tests_metricflow/performance/metric_generator.py new file mode 100644 index 0000000000..f1338b8d8f --- /dev/null +++ b/tests_metricflow/performance/metric_generator.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Sequence + +from dbt_semantic_interfaces.implementations.metric import ( + PydanticMetric, + PydanticMetricInput, + PydanticMetricInputMeasure, + PydanticMetricTypeParams, +) +from dbt_semantic_interfaces.type_enums import MetricType + +from tests_metricflow.performance.measure_generator import MeasureGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +@dataclass(frozen=True) +class MetricIndex: + """Index for a generated metric in the semantic manifest. + + Since metrics can be defined through other metrics, the `depth_index` describes the number of hops between a metric + and the simple metrics that it is based on when looking at the definition tree of a metric. + + For example: + * `depth_index=0` describes a simple metric that does not depend on any other metrics. + * `depth_index=1` describes a derived metric that is defined using metrics at `depth_index=0`. i.e. a derived metric + based on simple metrics. + * `depth_index=n` describes a derived metric that is defined using metrics at `depth_index=n-1`. + + The `width_index` enumerates the nth metric generated for the given depth (name needs improvement). + """ + + depth_index: int + width_index: int + + def __post_init__(self) -> None: # noqa: D105 + if self.depth_index < 0: + raise ValueError(f"{self.depth_index=} should be >= 0") + if self.width_index < 0: + raise ValueError(f"{self.width_index=} should be >=0") + + +class MetricGenerator: + """Helps generate metrics for the synthetic manifest.""" + + def __init__( # noqa: D107 + self, parameter_set: SyntheticManifestParameterSet, measure_generator: MeasureGenerator + ) -> None: + self._parameter_set = parameter_set + self._measure_generator = measure_generator + + def generate_metrics(self) -> Sequence[PydanticMetric]: # noqa: D102 + metrics = [] + for depth_index in range(self._parameter_set.max_metric_depth): + for width_index in range(self._parameter_set.max_metric_width): + metrics.append(self._generate_metric(MetricIndex(depth_index=depth_index, width_index=width_index))) + + return metrics + + def get_first_index_at_max_depth(self) -> MetricIndex: + """For the highest possible metric depth in the semantic manifest, return the index of the first metric.""" + return MetricIndex( + depth_index=self._parameter_set.max_metric_depth - 1, + width_index=0, + ) + + def get_next_wrapped_width_index(self, metric_index: MetricIndex) -> MetricIndex: + """Return the index of the next metric at the same depth level.""" + return MetricIndex( + depth_index=metric_index.depth_index, + width_index=(metric_index.width_index + 1) % self._parameter_set.max_metric_width, + ) + + def get_metric_name(self, index: MetricIndex) -> str: # noqa: D102 + return f"metric_{index.depth_index}_{index.width_index:03}" + + def _metric_indexes_at_depth(self, depth_index: int) -> Sequence[MetricIndex]: + return tuple( + MetricIndex(depth_index=depth_index, width_index=width_index) + for width_index in range(self._parameter_set.max_metric_width) + ) + + def _generate_metric(self, metric_index: MetricIndex) -> PydanticMetric: + if metric_index.depth_index == 0: + return PydanticMetric( + name=self.get_metric_name(metric_index), + type=MetricType.SIMPLE, + type_params=PydanticMetricTypeParams( + measure=PydanticMetricInputMeasure( + name=self._measure_generator.get_measure_name( + measure_index=metric_index.width_index % self._measure_generator.unique_measure_count + ) + ) + ), + ) + else: + input_metric_names = tuple( + self.get_metric_name(lower_depth_metric_index) + for lower_depth_metric_index in self._metric_indexes_at_depth(metric_index.depth_index - 1) + ) + return PydanticMetric( + name=self.get_metric_name(metric_index), + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams( + metrics=[PydanticMetricInput(name=input_metric_name) for input_metric_name in input_metric_names], + expr=" + ".join(input_metric_names), + ), + ) diff --git a/tests_metricflow/performance/saved_query_generator.py b/tests_metricflow/performance/saved_query_generator.py new file mode 100644 index 0000000000..3e647a3a82 --- /dev/null +++ b/tests_metricflow/performance/saved_query_generator.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from typing import Sequence + +from dbt_semantic_interfaces.implementations.saved_query import PydanticSavedQuery, PydanticSavedQueryQueryParams +from dbt_semantic_interfaces.references import EntityReference +from metricflow_semantics.naming.object_builder_scheme import ObjectBuilderNamingScheme +from metricflow_semantics.specs.dimension_spec import DimensionSpec + +from tests_metricflow.performance.categorical_dimension_generator import CategoricalDimensionGenerator +from tests_metricflow.performance.metric_generator import MetricGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class SavedQueryGenerator: + """Helps generate saved queries for the synthetic manifest.""" + + def __init__( # noqa: D107 + self, + parameter_set: SyntheticManifestParameterSet, + metric_generator: MetricGenerator, + categorical_dimension_generator: CategoricalDimensionGenerator, + ) -> None: + self._parameter_set = parameter_set + self._metric_generator = metric_generator + self._dimension_generator = categorical_dimension_generator + self._naming_scheme = ObjectBuilderNamingScheme() + + def _get_saved_query_name(self, saved_query_index: int) -> str: + return f"saved_query_{saved_query_index:03}" + + def generate_saved_queries(self) -> Sequence[PydanticSavedQuery]: # noqa: D102 + saved_queries = [] + next_metric_index = self._metric_generator.get_first_index_at_max_depth() + next_categorical_dimension_index = 0 + + for saved_query_index in range(self._parameter_set.saved_query_count): + metrics = [] + for _ in range(self._parameter_set.metrics_per_saved_query): + metrics.append(self._metric_generator.get_metric_name(next_metric_index)) + next_metric_index = self._metric_generator.get_next_wrapped_width_index(next_metric_index) + categorical_dimensions = [] + for _ in range(self._parameter_set.categorical_dimensions_per_saved_query): + categorical_dimensions.append( + self._naming_scheme.input_str( + DimensionSpec( + element_name=self._dimension_generator.get_dimension_name(next_categorical_dimension_index), + entity_links=(EntityReference(self._parameter_set.common_entity_name),), + ) + ) + ) + next_categorical_dimension_index = self._dimension_generator.get_next_wrapped_index( + next_categorical_dimension_index + ) + + saved_queries.append( + PydanticSavedQuery( + name=self._get_saved_query_name(saved_query_index), + query_params=PydanticSavedQueryQueryParams( + metrics=metrics, + group_by=categorical_dimensions, + ), + ) + ) + + return saved_queries diff --git a/tests_metricflow/performance/semantic_manifest_generator.py b/tests_metricflow/performance/semantic_manifest_generator.py new file mode 100644 index 0000000000..a6904ec4a8 --- /dev/null +++ b/tests_metricflow/performance/semantic_manifest_generator.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from typing import List + +from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation +from dbt_semantic_interfaces.implementations.project_configuration import PydanticProjectConfiguration +from dbt_semantic_interfaces.implementations.semantic_manifest import PydanticSemanticManifest +from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel +from dbt_semantic_interfaces.implementations.time_spine import PydanticTimeSpine, PydanticTimeSpinePrimaryColumn +from dbt_semantic_interfaces.type_enums import TimeGranularity + +from tests_metricflow.performance.categorical_dimension_generator import CategoricalDimensionGenerator +from tests_metricflow.performance.dimension_semantic_model_generator import DimensionSemanticModelGenerator +from tests_metricflow.performance.measure_generator import MeasureGenerator +from tests_metricflow.performance.measure_semantic_model_generator import MeasureSemanticModelGenerator +from tests_metricflow.performance.metric_generator import MetricGenerator +from tests_metricflow.performance.saved_query_generator import SavedQueryGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + + +class SyntheticManifestGenerator: + """Generates a synthetic semantic manifest that can be used for performance testing.""" + + def __init__(self, parameter_set: SyntheticManifestParameterSet) -> None: # noqa: D107 + self._parameter_set = parameter_set + self._measure_generator = MeasureGenerator(parameter_set) + self._categorical_dimension_generator = CategoricalDimensionGenerator(parameter_set) + self._measure_semantic_model_generator = MeasureSemanticModelGenerator( + parameter_set=parameter_set, + measure_generator=self._measure_generator, + ) + self._dimension_semantic_model_generator = DimensionSemanticModelGenerator( + parameter_set=parameter_set, + categorical_dimension_generator=self._categorical_dimension_generator, + ) + self._metric_generator = MetricGenerator( + parameter_set=parameter_set, + measure_generator=self._measure_generator, + ) + self._saved_query_generator = SavedQueryGenerator( + parameter_set=parameter_set, + metric_generator=self._metric_generator, + categorical_dimension_generator=self._categorical_dimension_generator, + ) + + def generate_manifest(self) -> PydanticSemanticManifest: + """Generate a manifest using the given parameters.""" + semantic_models: List[PydanticSemanticModel] = [] + + semantic_models.extend(self._measure_semantic_model_generator.generate_semantic_models()) + semantic_models.extend(self._dimension_semantic_model_generator.generate_semantic_models()) + + return PydanticSemanticManifest( + semantic_models=semantic_models, + metrics=self._metric_generator.generate_metrics(), + project_configuration=PydanticProjectConfiguration( + time_spines=[ + PydanticTimeSpine( + node_relation=PydanticNodeRelation( + alias="time_spine_source_table", + schema_name="demo", + ), + primary_column=PydanticTimeSpinePrimaryColumn( + name="ds", + time_granularity=TimeGranularity.DAY, + ), + ) + ] + ), + saved_queries=self._saved_query_generator.generate_saved_queries(), + ) diff --git a/tests_metricflow/performance/synthetic_manifest_parameter_set.py b/tests_metricflow/performance/synthetic_manifest_parameter_set.py new file mode 100644 index 0000000000..e34a7a3917 --- /dev/null +++ b/tests_metricflow/performance/synthetic_manifest_parameter_set.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class SyntheticManifestParameterSet: + """Describes how to generate a synthetic manifest for performance testing. + + Goals are: + * Allow modeling of similar patterns seen in production manifests. + * Make generation straightforward. + * Minimize the number of parameters required. + + Notes: + * The synthetic manifest groups semantic models into two types - ones containing measures, and others containing dimensions. + * A dimension with the same name does not appear in multiple semantic models. + * Al semantic models contain a common entity so that any measure can be queried by any dimension. + * The metric `depth` describes the number of hops that are required to get to the simple metric when following the + definition tree. + * Metrics at `depth=0` are simple metrics. Metrics at other depth values are derived. + * Each metric is defined using all possible metrics at a lower depth. + * The number of metrics that are generated with a given `depth` is called the `width`. + * A random seed can be added later. + """ + + # The number of semantic models to generate that contain measures. + measure_semantic_model_count: int + # For each semantic model containing measures, the number of measures that it should contain. + measures_per_semantic_model: int + + # The number of semantic models to generate that contain dimensions. + dimension_semantic_model_count: int + # For each semantic model containing measures, the number of dimensions that it should contain. + categorical_dimensions_per_semantic_model: int + + # See class docstring. + max_metric_depth: int + max_metric_width: int + + # The number of saved queries to generate and the number of elements in each. + saved_query_count: int + metrics_per_saved_query: int + categorical_dimensions_per_saved_query: int + + # The name of the entity that is common to semantic models containing measures and the semantic model + # containing dimensions. + common_entity_name: str = "common_entity" diff --git a/tests_metricflow/performance/test_manifest_generator.py b/tests_metricflow/performance/test_manifest_generator.py new file mode 100644 index 0000000000..098dd7f9c5 --- /dev/null +++ b/tests_metricflow/performance/test_manifest_generator.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import logging + +from _pytest.fixtures import FixtureRequest +from dbt_semantic_interfaces.implementations.semantic_manifest import PydanticSemanticManifest +from dbt_semantic_interfaces.transformations.semantic_manifest_transformer import PydanticSemanticManifestTransformer +from dbt_semantic_interfaces.validations.semantic_manifest_validator import SemanticManifestValidator +from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat +from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration +from metricflow_semantics.test_helpers.snapshot_helpers import assert_object_snapshot_equal + +from tests_metricflow.performance.semantic_manifest_generator import SyntheticManifestGenerator +from tests_metricflow.performance.synthetic_manifest_parameter_set import SyntheticManifestParameterSet + +logger = logging.getLogger(__name__) + + +def test_manifest_generator( # noqa: D103 + request: FixtureRequest, + mf_test_configuration: MetricFlowTestConfiguration, +) -> None: + parameter_set = SyntheticManifestParameterSet( + measure_semantic_model_count=2, + measures_per_semantic_model=2, + dimension_semantic_model_count=2, + categorical_dimensions_per_semantic_model=2, + max_metric_depth=2, + max_metric_width=2, + saved_query_count=2, + metrics_per_saved_query=2, + categorical_dimensions_per_saved_query=2, + ) + generator = SyntheticManifestGenerator(parameter_set) + manifest = generator.generate_manifest() + assert_object_snapshot_equal( + request=request, + mf_test_configuration=mf_test_configuration, + obj=manifest, + ) + + manifest = PydanticSemanticManifestTransformer.transform(manifest) + validator = SemanticManifestValidator[PydanticSemanticManifest]() + validation_result = validator.validate_semantic_manifest(manifest) + logger.debug(LazyFormat("Generated manifest", manifest=manifest)) + + assert not validation_result.has_blocking_issues, str( + LazyFormat( + "Found validation issues with the generated manifest", + validation_result=validation_result, + manifest=manifest, + ) + ) diff --git a/tests_metricflow/snapshots/test_manifest_generator.py/PydanticSemanticManifest/test_manifest_generator__result.txt b/tests_metricflow/snapshots/test_manifest_generator.py/PydanticSemanticManifest/test_manifest_generator__result.txt new file mode 100644 index 0000000000..2f38bd7f66 --- /dev/null +++ b/tests_metricflow/snapshots/test_manifest_generator.py/PydanticSemanticManifest/test_manifest_generator__result.txt @@ -0,0 +1,155 @@ +PydanticSemanticManifest( + semantic_models=[ + PydanticSemanticModel( + name='measure_model_000', + node_relation=PydanticNodeRelation( + alias='measure_model_000', + schema_name='demo', + relation_name='demo.measure_model_000', + ), + entities=[ + PydanticEntity(name='measure_model_000_primary_entity', type=PRIMARY), + PydanticEntity(name='common_entity', type=UNIQUE), + ], + measures=[ + PydanticMeasure(name='measure_000', agg=SUM, agg_time_dimension='ds'), + PydanticMeasure(name='measure_001', agg=SUM, agg_time_dimension='ds'), + ], + dimensions=[ + PydanticDimension( + name='ds', + type=TIME, + is_partition=False, + type_params=PydanticDimensionTypeParams(time_granularity=DAY), + ), + ], + ), + PydanticSemanticModel( + name='measure_model_001', + node_relation=PydanticNodeRelation( + alias='measure_model_001', + schema_name='demo', + relation_name='demo.measure_model_001', + ), + entities=[ + PydanticEntity(name='measure_model_001_primary_entity', type=PRIMARY), + PydanticEntity(name='common_entity', type=UNIQUE), + ], + measures=[ + PydanticMeasure(name='measure_002', agg=SUM, agg_time_dimension='ds'), + PydanticMeasure(name='measure_003', agg=SUM, agg_time_dimension='ds'), + ], + dimensions=[ + PydanticDimension( + name='ds', + type=TIME, + is_partition=False, + type_params=PydanticDimensionTypeParams(time_granularity=DAY), + ), + ], + ), + PydanticSemanticModel( + name='dimension_model_000', + node_relation=PydanticNodeRelation( + alias='dimension_model_000', + schema_name='demo', + relation_name='demo.dimension_model_000', + ), + entities=[ + PydanticEntity(name='dimension_model_000_primary_entity', type=PRIMARY), + PydanticEntity(name='common_entity', type=UNIQUE), + ], + dimensions=[ + PydanticDimension(name='dimension_000', type=CATEGORICAL, is_partition=False), + PydanticDimension(name='dimension_001', type=CATEGORICAL, is_partition=False), + ], + ), + PydanticSemanticModel( + name='dimension_model_001', + node_relation=PydanticNodeRelation( + alias='dimension_model_001', + schema_name='demo', + relation_name='demo.dimension_model_001', + ), + entities=[ + PydanticEntity(name='dimension_model_001_primary_entity', type=PRIMARY), + PydanticEntity(name='common_entity', type=UNIQUE), + ], + dimensions=[ + PydanticDimension(name='dimension_002', type=CATEGORICAL, is_partition=False), + PydanticDimension(name='dimension_003', type=CATEGORICAL, is_partition=False), + ], + ), + ], + metrics=[ + PydanticMetric( + name='metric_0_000', + type=SIMPLE, + type_params=PydanticMetricTypeParams( + measure=PydanticMetricInputMeasure(name='measure_000', join_to_timespine=False), + ), + ), + PydanticMetric( + name='metric_0_001', + type=SIMPLE, + type_params=PydanticMetricTypeParams( + measure=PydanticMetricInputMeasure(name='measure_001', join_to_timespine=False), + ), + ), + PydanticMetric( + name='metric_1_000', + type=DERIVED, + type_params=PydanticMetricTypeParams( + expr='metric_0_000 + metric_0_001', + metrics=[PydanticMetricInput(name='metric_0_000'), PydanticMetricInput(name='metric_0_001')], + ), + ), + PydanticMetric( + name='metric_1_001', + type=DERIVED, + type_params=PydanticMetricTypeParams( + expr='metric_0_000 + metric_0_001', + metrics=[PydanticMetricInput(name='metric_0_000'), PydanticMetricInput(name='metric_0_001')], + ), + ), + ], + project_configuration=PydanticProjectConfiguration( + dsi_package_version=PydanticSemanticVersion( + major_version='0', + minor_version='7', + patch_version='2', + ), + time_spines=[ + PydanticTimeSpine( + node_relation=PydanticNodeRelation( + alias='time_spine_source_table', + schema_name='demo', + relation_name='demo.time_spine_source_table', + ), + primary_column=PydanticTimeSpinePrimaryColumn(name='ds', time_granularity=DAY), + ), + ], + ), + saved_queries=[ + PydanticSavedQuery( + name='saved_query_000', + query_params=PydanticSavedQueryQueryParams( + metrics=['metric_1_000', 'metric_1_001'], + group_by=[ + "Dimension('common_entity__dimension_000')", + "Dimension('common_entity__dimension_001')", + ], + ), + ), + PydanticSavedQuery( + name='saved_query_001', + query_params=PydanticSavedQueryQueryParams( + metrics=['metric_1_000', 'metric_1_001'], + group_by=[ + "Dimension('common_entity__dimension_002')", + "Dimension('common_entity__dimension_003')", + ], + ), + ), + ], +)