Skip to content

Commit

Permalink
Consolidate property filters into a class (#1441)
Browse files Browse the repository at this point in the history
The signature of a few functions in `ValidLinkableSpecResolver` take in
a few arguments that describe how to filter the returned items based on
properties. To simplify the signatures and enable improved caching in a
later PR, this PR groups them into the `LinkableElementFilter` class.
plypaul authored Oct 2, 2024
1 parent df26a6b commit fab87da
Showing 9 changed files with 121 additions and 86 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import FrozenSet

from typing_extensions import Self, override

from metricflow_semantics.collection_helpers.merger import Mergeable
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty


@dataclass(frozen=True)
class LinkableElementFilter(Mergeable):
"""Describes a way to filter the `LinkableElements` in a `LinkableElementSet`."""

with_any_of: FrozenSet[LinkableElementProperty] = LinkableElementProperty.all_properties()
without_any_of: FrozenSet[LinkableElementProperty] = frozenset()
without_all_of: FrozenSet[LinkableElementProperty] = frozenset()

@override
def merge(self: Self, other: LinkableElementFilter) -> LinkableElementFilter:
return LinkableElementFilter(
with_any_of=self.with_any_of.union(other.with_any_of),
without_any_of=self.without_any_of.union(other.without_any_of),
without_all_of=self.without_all_of.union(other.without_all_of),
)

@classmethod
@override
def empty_instance(cls) -> LinkableElementFilter:
return LinkableElementFilter()
Original file line number Diff line number Diff line change
@@ -5,15 +5,15 @@
from collections import defaultdict
from dataclasses import dataclass, field
from functools import cached_property
from typing import Dict, FrozenSet, List, Sequence, Set, Tuple
from typing import Dict, List, Sequence, Set, Tuple

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.references import SemanticModelReference
from typing_extensions import override

from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element import (
ElementPathKey,
LinkableDimension,
@@ -221,16 +221,18 @@ def linkable_elements_for_path_key(self, path_key: ElementPathKey) -> Sequence[L

def filter(
self,
with_any_of: FrozenSet[LinkableElementProperty],
without_any_of: FrozenSet[LinkableElementProperty] = frozenset(),
without_all_of: FrozenSet[LinkableElementProperty] = frozenset(),
element_filter: LinkableElementFilter,
) -> LinkableElementSet:
"""Filter elements in the set.
First, only elements with at least one property in the "with_any_of" set are retained. Then, any elements with
a property in "without_any_of" set are removed. Lastly, any elements with all properties in without_all_of
are removed.
"""
with_any_of = element_filter.with_any_of
without_any_of = element_filter.without_any_of
without_all_of = element_filter.without_all_of

key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = {}
key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = {}
key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = {}
Original file line number Diff line number Diff line change
@@ -27,6 +27,7 @@
from metricflow_semantics.mf_logging.pretty_print import mf_pformat
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element import (
ElementPathKey,
LinkableDimension,
@@ -105,7 +106,8 @@ def __init__(
if metric.type is MetricType.CUMULATIVE:
linkable_sets_for_measure.append(
self._get_linkable_element_set_for_measure(
measure, without_any_of=frozenset({LinkableElementProperty.DATE_PART})
measure,
LinkableElementFilter(without_any_of=frozenset({LinkableElementProperty.DATE_PART})),
)
)
elif (
@@ -593,16 +595,15 @@ def _get_joined_elements_without_cache(
def _get_linkable_element_set_for_measure(
self,
measure_reference: MeasureReference,
with_any_of: FrozenSet[LinkableElementProperty] = LinkableElementProperty.all_properties(),
without_any_of: FrozenSet[LinkableElementProperty] = frozenset(),
element_filter: LinkableElementFilter = LinkableElementFilter(),
) -> LinkableElementSet:
"""See get_linkable_element_set_for_measure()."""
measure_semantic_model = self._get_semantic_model_for_measure(measure_reference)

elements_in_semantic_model = self._get_elements_in_semantic_model(measure_semantic_model)

# Filter out group-by metrics if not specified by the property as there can be a large number of them.
if LinkableElementProperty.METRIC not in without_any_of:
if LinkableElementProperty.METRIC not in element_filter.without_any_of:
metrics_linked_to_semantic_model = self.get_joinable_metrics_for_semantic_model(
semantic_model=measure_semantic_model,
using_join_path=SemanticModelJoinPath(left_semantic_model_reference=measure_semantic_model.reference),
@@ -620,42 +621,35 @@ def _get_linkable_element_set_for_measure(
metric_time_elements,
joined_elements,
)
).filter(
with_any_of=with_any_of,
without_any_of=without_any_of,
)
).filter(element_filter)

def get_linkable_element_set_for_measure(
self,
measure_reference: MeasureReference,
with_any_of: FrozenSet[LinkableElementProperty],
without_any_of: FrozenSet[LinkableElementProperty],
element_filter: LinkableElementFilter,
) -> LinkableElementSet:
"""Get the valid linkable elements for the given measure."""
return self._get_linkable_element_set_for_measure(
measure_reference=measure_reference,
with_any_of=with_any_of,
without_any_of=without_any_of,
element_filter=element_filter,
)

def get_linkable_elements_for_distinct_values_query(
self,
with_any_of: FrozenSet[LinkableElementProperty],
without_any_of: FrozenSet[LinkableElementProperty],
element_filter: LinkableElementFilter,
) -> LinkableElementSet:
"""Returns queryable items for a distinct group-by-item values query.
A distinct group-by-item values query does not include any metrics.
"""
return self._no_metric_linkable_element_set.filter(with_any_of=with_any_of, without_any_of=without_any_of)
return self._no_metric_linkable_element_set.filter(element_filter)

# TODO: the results of this method don't actually match what will be allowed for the metric. This method checks
# _metric_to_linkable_element_sets, while the actual group by resolution DAG calls _get_linkable_element_set_for_measure.
def get_linkable_elements_for_metrics(
self,
metric_references: Sequence[MetricReference],
with_any_of: FrozenSet[LinkableElementProperty] = LinkableElementProperty.all_properties(),
without_any_of: FrozenSet[LinkableElementProperty] = frozenset(),
element_filter: LinkableElementFilter = LinkableElementFilter(),
) -> LinkableElementSet:
"""Gets the valid linkable elements that are common to all requested metrics."""
linkable_element_sets = []
@@ -667,10 +661,7 @@ def get_linkable_elements_for_metrics(
# Using .only_unique_path_keys to exclude ambiguous elements where there are multiple join paths to get
# a dimension / entity.
metric_result = LinkableElementSet.intersection_by_path_key(
[
element_set.only_unique_path_keys.filter(with_any_of=with_any_of, without_any_of=without_any_of)
for element_set in element_sets
]
[element_set.only_unique_path_keys.filter(element_filter) for element_set in element_sets]
)
linkable_element_sets.append(metric_result)

Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
import functools
import logging
import time
from typing import Dict, FrozenSet, Optional, Sequence, Set
from typing import Dict, Optional, Sequence, Set

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.protocols.metric import Metric, MetricInputMeasure, MetricType
@@ -13,7 +13,7 @@

from metricflow_semantics.errors.error_classes import DuplicateMetricError, MetricNotFoundError, NonExistentMeasureError
from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element_set import LinkableElementSet
from metricflow_semantics.model.semantics.linkable_spec_resolver import (
ValidLinkableSpecResolver,
@@ -66,18 +66,13 @@ def __init__(
def linkable_elements_for_measure(
self,
measure_reference: MeasureReference,
with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
element_filter: LinkableElementFilter = LinkableElementFilter(),
) -> LinkableElementSet:
"""Return the set of linkable elements reachable from a given measure."""
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of
frozen_without_any_of = frozenset() if without_any_of is None else without_any_of

start_time = time.time()
linkable_element_set = self._linkable_spec_resolver.get_linkable_element_set_for_measure(
measure_reference=measure_reference,
with_any_of=frozen_with_any_of,
without_any_of=frozen_without_any_of,
element_filter=element_filter,
)
logger.debug(
LazyFormat(
@@ -89,31 +84,18 @@ def linkable_elements_for_measure(

@functools.lru_cache
def linkable_elements_for_no_metrics_query(
self,
with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
self, element_set_filter: LinkableElementFilter = LinkableElementFilter()
) -> LinkableElementSet:
"""Return the reachable linkable elements for a dimension values query with no metrics."""
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of
frozen_without_any_of = frozenset() if without_any_of is None else without_any_of

return self._linkable_spec_resolver.get_linkable_elements_for_distinct_values_query(
with_any_of=frozen_with_any_of,
without_any_of=frozen_without_any_of,
)
return self._linkable_spec_resolver.get_linkable_elements_for_distinct_values_query(element_set_filter)

@functools.lru_cache
def linkable_elements_for_metrics(
self,
metric_references: Sequence[MetricReference],
with_any_property: FrozenSet[LinkableElementProperty] = LinkableElementProperty.all_properties(),
without_any_property: FrozenSet[LinkableElementProperty] = frozenset(),
self, metric_references: Sequence[MetricReference], element_set_filter: LinkableElementFilter
) -> LinkableElementSet:
"""Retrieve the matching set of linkable elements common to all metrics requested (intersection)."""
return self._linkable_spec_resolver.get_linkable_elements_for_metrics(
metric_references=metric_references,
with_any_of=with_any_property,
without_any_of=without_any_property,
metric_references=metric_references, element_filter=element_set_filter
)

def get_metrics(self, metric_references: Sequence[MetricReference]) -> Sequence[Metric]: # noqa: D102
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@
from metricflow_semantics.mf_logging.pretty_print import mf_pformat, mf_pformat_many
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.query.group_by_item.candidate_push_down.group_by_item_candidate import GroupByItemCandidateSet
from metricflow_semantics.query.group_by_item.filter_spec_resolution.filter_location import (
WhereFilterLocation,
@@ -187,8 +188,10 @@ def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResu

items_available_for_measure = self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_measure(
measure_reference=node.measure_reference,
with_any_of=self._with_any_property,
without_any_of=frozenset(without_any_property),
element_filter=LinkableElementFilter(
with_any_of=self._with_any_property or LinkableElementProperty.all_properties(),
without_any_of=frozenset(without_any_property),
),
)

# The following is needed to handle limitation of cumulative metrics. Filtering could be done at the measure
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@
)
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element import (
ElementPathKey,
LinkableDimension,
@@ -265,7 +266,7 @@ def test_filter_with_any_of() -> None:
filter_properties = frozenset([LinkableElementProperty.JOINED, LinkableElementProperty.ENTITY])
linkable_set = _linkable_set_with_uniques_and_duplicates()

filtered_set = linkable_set.filter(with_any_of=filter_properties)
filtered_set = linkable_set.filter(LinkableElementFilter(with_any_of=filter_properties))

filtered_dimensions = [
dim for dim in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_dimensions.values())
@@ -304,7 +305,9 @@ def test_filter_without_any_of() -> None:
without_any_of_properties = frozenset([LinkableElementProperty.ENTITY, LinkableElementProperty.METRIC])
linkable_set = _linkable_set_with_uniques_and_duplicates()

filtered_set = linkable_set.filter(with_any_of=with_any_of_properties, without_any_of=without_any_of_properties)
filtered_set = linkable_set.filter(
LinkableElementFilter(with_any_of=with_any_of_properties, without_any_of=without_any_of_properties)
)

filtered_dimensions = [
dim for dim in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_dimensions.values())
@@ -340,7 +343,9 @@ def test_filter_without_all_of() -> None:
without_all_of_properties = frozenset([LinkableElementProperty.JOINED, LinkableElementProperty.ENTITY])
linkable_set = _linkable_set_with_uniques_and_duplicates()

filtered_set = linkable_set.filter(with_any_of=with_any_of_properties, without_all_of=without_all_of_properties)
filtered_set = linkable_set.filter(
LinkableElementFilter(with_any_of=with_any_of_properties, without_all_of=without_all_of_properties)
)

filtered_metrics = [
metric for metric in itertools.chain.from_iterable(filtered_set.path_key_to_linkable_metrics.values())
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
)
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
from metricflow_semantics.model.semantics.element_filter import LinkableElementFilter
from metricflow_semantics.model.semantics.linkable_element import SemanticModelJoinPath, SemanticModelJoinPathElement
from metricflow_semantics.model.semantics.linkable_spec_resolver import ValidLinkableSpecResolver
from metricflow_semantics.model.semantics.semantic_model_join_evaluator import MAX_JOIN_HOPS
@@ -58,8 +59,9 @@ def test_all_properties( # noqa: D103
set_id="result0",
linkable_element_set=simple_model_spec_resolver.get_linkable_elements_for_metrics(
metric_references=[MetricReference(element_name="bookings"), MetricReference(element_name="views")],
with_any_of=LinkableElementProperty.all_properties(),
without_any_of=frozenset({}),
element_filter=LinkableElementFilter(
with_any_of=LinkableElementProperty.all_properties(), without_any_of=frozenset()
),
),
)

@@ -75,8 +77,9 @@ def test_one_property( # noqa: D103
set_id="result0",
linkable_element_set=simple_model_spec_resolver.get_linkable_elements_for_metrics(
metric_references=[MetricReference(element_name="bookings"), MetricReference(element_name="views")],
with_any_of=frozenset({LinkableElementProperty.LOCAL}),
without_any_of=frozenset(),
element_filter=LinkableElementFilter(
with_any_of=frozenset({LinkableElementProperty.LOCAL}), without_any_of=frozenset()
),
),
)

@@ -92,8 +95,9 @@ def test_metric_time_property_for_cumulative_metric( # noqa: D103
set_id="result0",
linkable_element_set=simple_model_spec_resolver.get_linkable_elements_for_metrics(
metric_references=[MetricReference(element_name="trailing_2_months_revenue")],
with_any_of=frozenset({LinkableElementProperty.METRIC_TIME}),
without_any_of=frozenset(),
element_filter=LinkableElementFilter(
with_any_of=frozenset({LinkableElementProperty.METRIC_TIME}), without_any_of=frozenset()
),
),
)

@@ -109,8 +113,9 @@ def test_metric_time_property_for_derived_metrics( # noqa: D103
set_id="result0",
linkable_element_set=simple_model_spec_resolver.get_linkable_elements_for_metrics(
metric_references=[MetricReference(element_name="bookings_per_view")],
with_any_of=frozenset({LinkableElementProperty.METRIC_TIME}),
without_any_of=frozenset(),
element_filter=LinkableElementFilter(
with_any_of=frozenset({LinkableElementProperty.METRIC_TIME}), without_any_of=frozenset()
),
),
)

@@ -126,8 +131,10 @@ def test_cyclic_join_manifest( # noqa: D103
set_id="result0",
linkable_element_set=cyclic_join_manifest_spec_resolver.get_linkable_elements_for_metrics(
metric_references=[MetricReference(element_name="listings")],
with_any_of=LinkableElementProperty.all_properties(),
without_any_of=frozenset(),
element_filter=LinkableElementFilter(
with_any_of=LinkableElementProperty.all_properties(),
without_any_of=frozenset(),
),
),
)

@@ -192,8 +199,10 @@ def test_linkable_element_set_as_spec_set(
linkable_spec_set = InstanceSpecSet.create_from_specs(
simple_model_spec_resolver.get_linkable_element_set_for_measure(
MeasureReference(element_name="listings"),
with_any_of=LinkableElementProperty.all_properties(),
without_any_of=frozenset({}),
element_filter=LinkableElementFilter(
with_any_of=LinkableElementProperty.all_properties(),
without_any_of=frozenset(),
),
).specs
)
assert_spec_set_snapshot_equal(
Loading

0 comments on commit fab87da

Please sign in to comment.