From cb03029298835c1b3b11fbd4d2a2e4c34b06dd5b Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 1 Oct 2024 20:10:03 +0200 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Replace=20need=20dicts/lis?= =?UTF-8?q?ts=20with=20views=20(with=20fast=20filtering)=20(#1281)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit addresses issues in performance scalability of needs filtering: - In a known user project, with ~40,000 needs, there are 5316 individual filter calls, across directives such as `needarch`, `needtable`, `needpie`, etc. Each takes on average 0.33 seconds, cumulative totalling **1751 seconds** - Applying this PR reduces the average time to 0.033 seconds, cumulatively totalling **181 seconds** ---- The main issue with filtering (a.k.a. querying) needs, is that it requires looping over every need and executing a Python evaluation of the filter string (e.g. `id == "xxx"`), which scales with `O(N)` time. To reduce this to `O(1)` time, for common patterns, we look to do two things: 1. More tightly control access to needs data across its lifecycle (see #1264): By making needs data strictly immutable/read-only during the write/analysis phase (after it has been fully collected and post-processed), and moving access behind an abstract interface (as opposed to a standard dictionary), we can perform indexing on standard need fields, making value lookups `O(1)` 2. Analysing the filter string for common patterns (by parsing/analysing the Python AST), we can then utilise *index lookups* rather than *row scans* in these cases (akin to [SQL query plans](https://sqlite.org/eqp.html)). --- One complication in creating the indexes and abstract interfaces, is that within the code base there are essentially two ways of accessing needs data: - As a mapping of need ID to need data (now `NeedsView`) - As a list of both needs and "expanded" parts (now `NeedsAndPartsListView`) Particularly when it comes to e.g. `id == "xxx"`, this then has different meanings, as the former is only filtering for need IDs and the latter is filtering for both Need IDs and part IDs --- It is of note that this will be breaking for any projects that currently attempts to mutate needs data within the write phase. This has been mitigated by the addition of two sphinx events, which give more *precise* control for this use case: - `needs-before-post-processing`: callbacks `func(app, needs)` are called just before the needs are post-processed (e.g. processing dynamic functions and back links) - `needs-before-sealing`: callbacks `func(app, needs)` just after post-processing, before the needs are changed to read-only Additionally, `env.needs_all_needs` has been replaced with `env._needs_all_needs`, since this "raw" data-structure should not be accessed directly. The `get_needs_view` function has been added to the `sphinx_needs.api` to mitigate this, and it should perhaps be made clearer that users should NOT be accessing any `sphinx_needs` API outside this module. --- docs/api.rst | 23 +- docs/conf.py | 4 +- docs/filter.rst | 85 ++-- docs/tutorial.rst | 12 +- sphinx_needs/api/__init__.py | 3 +- sphinx_needs/api/need.py | 13 + sphinx_needs/builder.py | 9 +- sphinx_needs/data.py | 58 ++- sphinx_needs/directives/need.py | 10 +- sphinx_needs/directives/needbar.py | 8 +- sphinx_needs/directives/needflow/_graphviz.py | 3 +- sphinx_needs/directives/needflow/_plantuml.py | 8 +- sphinx_needs/directives/needflow/_shared.py | 28 +- sphinx_needs/directives/needpie.py | 4 +- sphinx_needs/directives/needsequence.py | 8 +- sphinx_needs/external_needs.py | 2 +- sphinx_needs/filter_common.py | 275 ++++++++---- sphinx_needs/functions/common.py | 28 +- sphinx_needs/functions/functions.py | 26 +- sphinx_needs/needs.py | 5 +- sphinx_needs/roles/need_count.py | 9 +- sphinx_needs/roles/need_func.py | 10 +- sphinx_needs/roles/need_part.py | 20 +- sphinx_needs/utils.py | 5 +- sphinx_needs/views.py | 402 ++++++++++++++++++ sphinx_needs/warnings.py | 6 +- tests/test_filter.py | 173 ++++++++ tests/test_needimport.py | 2 +- tests/test_needuml.py | 2 +- 29 files changed, 976 insertions(+), 265 deletions(-) create mode 100644 sphinx_needs/views.py diff --git a/docs/api.rst b/docs/api.rst index f71acb4d7..75ace83c9 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -6,17 +6,15 @@ Python API **Sphinx-Needs** provides an open API for other Sphinx-extensions to provide specific need-types, create needs or make usage of the filter possibilities. -The API allows the injection of extra configuration into it. The API does not support the overall manipulation (e.g remove need types) +The API allows the injection of extra configuration, but +does not support manipulation of it (e.g remove need types), to keep the final configuration transparent for the Sphinx project authors. -For some implementation ideas, take a look into the Sphinx extension -`Sphinx-Test-Reports `_ and its -`source code `_. - .. _api_configuration: Configuration ------------- + .. automodule:: sphinx_needs.api.configuration :members: @@ -25,12 +23,14 @@ Configuration Need ---- + .. automodule:: sphinx_needs.api.need :members: Exceptions ---------- + .. automodule:: sphinx_needs.api.exceptions :members: @@ -38,4 +38,15 @@ Data ---- .. automodule:: sphinx_needs.data - :members: NeedsInfoType, NeedsView, NeedsPartsView + :members: NeedsInfoType, NeedsMutable + +Views +----- + +These views are returned by certain functions, and injected into filters, +but should not be instantiated directly. + +.. automodule:: sphinx_needs.views + :members: + :undoc-members: + :special-members: __iter__, __getitem__, __len__ diff --git a/docs/conf.py b/docs/conf.py index 2913a542b..11fdac7bb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,8 +47,8 @@ ("py:class", "docutils.nodes.Node"), ("py:class", "docutils.parsers.rst.states.RSTState"), ("py:class", "docutils.statemachine.StringList"), - ("py:class", "T"), ("py:class", "sphinx_needs.debug.T"), + ("py:class", "sphinx_needs.views._LazyIndexes"), ] rst_epilog = """ @@ -756,7 +756,7 @@ def create_tutorial_needs(app: Sphinx, _env, _docnames): We do this dynamically, to avoid having to maintain the JSON file manually. """ - all_data = SphinxNeedsData(app.env).get_needs_view() + all_data = SphinxNeedsData(app.env).get_needs_mutable() writer = NeedsList(app.config, outdir=app.confdir, confdir=app.confdir) for i in range(1, 5): test_id = f"T_00{i}" diff --git a/docs/filter.rst b/docs/filter.rst index 8f4fc004e..b142950dc 100644 --- a/docs/filter.rst +++ b/docs/filter.rst @@ -3,10 +3,8 @@ Filtering needs =============== -**Sphinx-Needs** supports the filtering of need and need_parts by using easy to use options or powerful filter string. - -Available options are specific to the used directive, whereas the filter string is supported by all directives and -roles, which provide filter capabilities. +The filtering of needs and need parts is supported consistently across numerous directives and roles, +either by using filter options or by using a filter string. .. _filter_options: @@ -19,10 +17,8 @@ The following filter options are supported by directives: * :ref:`needtable` * :ref:`needflow` * :ref:`needpie` - * ``needfilter`` (deprecated!) * :ref:`needextend` - Related to the used directive and its representation, the filter options create a list of needs, which match the filters for status, tags, types and filter. @@ -115,6 +111,7 @@ The usage of a filter string is supported/required by: * :ref:`needflow` * :ref:`needpie` * :ref:`needbar` +* :ref:`needuml` / :ref:`needarch` The filter string must be a valid Python expression: @@ -125,11 +122,11 @@ The filter string must be a valid Python expression: A filter string gets evaluated on needs and need_parts! A need_part inherits all options from its parent need, if the need_part has no own content for this option. -E.g. the need_part *title* is kept, but the *status* attribute is taken from its parent need. +E.g. the need_part *content* is kept, but the *status* attribute is taken from its parent need. .. note:: - Following attributes are kept inside a need_part: id, title, links_back + The following attributes are kept inside a need_part: id, title, links_back This allows to perform searches for need_parts, where search options are based on parent attributes. @@ -139,29 +136,7 @@ The following filter will find all need_parts, which are part of a need, which h :need_count:`is_part and 'car' in tags` -Inside a filter string all the fields of :py:class:`.NeedsInfoType` can be used, including: - -* **tags** as Python list (compare like ``"B" in tags``) -* **type** as Python string (compare like ``"story" == type``) -* **status** as Python string (compare like ``"opened" != status``) -* **sections** as Python list with the hierarchy of sections with lowest-level - section first. (compare like ``"Section Header" in sections``) -* **id** as Python string (compare like ``"MY_ID_" in id``) -* **title** as Python string (compare like ``len(title.split(" ")) > 5``) -* **links** as Python list (compare like ``"ID_123" not in links``) -* **links_back** as Python list (compare like ``"ID_123" not in links_back``) -* **content** as Python string (compare like ``len(content) == 0``) -* **is_need** as Python boolean. (compare like ``is_need``) -* **is_part** as Python boolean. (compare like ``is_part``) -* **parts** as Python list with :ref:`need_part` of the current need. (compare like ``len(parts)>0``) -* **sections** as list of sections names, th which the need belongs to. -* **section_name** as string, which defines the last/lowest section a need belongs to. -* **docname** as string, which defines the name of the document in which a need is defined, without the extension (similar to Sphinx' ``:doc:`` role) -* **signature** as string, which contains a function-name, possible set by - `sphinx-autodoc `_ above the need. -* **parent_need** as string, which is an id of the need, which has the current need defined in its content - (added 0.6.2). -* **parent_needs** as string, which is a list of need ids (added 0.6.2). +Inside a filter string all the fields of :py:class:`.NeedsInfoType` can be used, including. Additional variables for :ref:`need_part`: @@ -172,11 +147,10 @@ Additional variables for :ref:`need_part`: .. note:: If extra options were specified using :ref:`needs_extra_options` then those will be available for use in filter expressions as well. - Finally, the following are available: * :ref:`re_search`, as Python function for performing searches with a regular expression -* **needs** as :class:`.NeedsPartsView` object, which contains all needs and need_parts. +* **needs** as :class:`.NeedsAndPartsListView` object, which contains all needs and need_parts. If your expression is valid and it's True, the related need is added to the filter result list. If it is invalid or returns False, the related need is not taken into account for the current filter. @@ -214,6 +188,30 @@ If it is invalid or returns False, the related need is not taken into account fo .. needfilter:: :filter: "filter_example" in tags and (("B" in tags or ("spec" == type and "closed" == status)) or "test" == type) +.. _filter_string_performance: + +Filter string performance +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 4.0.0 + +The filter string is evaluated by default for each need and need part +and, therefore, can be become a performance bottleneck for projects with large numbers of needs. + +To improve performance, certain common patterns are identified and optimized by the filter engine, and so using such patterns is recommended: + +- ``is_external`` / ``is_external == True`` / ``is_external == False`` +- ``id == 'value'`` / ``id == "value"`` / ``'value' == id`` / ``"value" == id`` +- ``id in ['value1', 'value2', ...]`` / ``id in ("value1", "value2", ...)`` +- ``type == 'value'`` / ``type == "value"`` / ``'value' == type`` / ``"value" == type`` +- ``type in ['value1', 'value2', ...]`` / ``type in ("value1", "value2", ...)`` +- ``status == 'value'`` / ``status == "value"`` / ``'value' == status`` / ``"value" == status`` +- ``status in ['value1', 'value2', ...]`` / ``status in ("value1", "value2", ...)`` +- ``'value' in tags`` / ``"value" in tags`` + +Also filters containing ``and`` will be split into multiple filters and evaluated separately for the above patterns. +For example, ``type == 'spec' and other == 'value'`` will first be filtered performantly by ``type == 'spec'`` and then the remaining needs will be filtered by ``other == 'value'``. + .. _re_search: search @@ -265,6 +263,8 @@ with the help of Python. The used code must define a variable ``results``, which must be a list and contains the filtered needs. +The code also has access to a variable called ``needs``, which is a :class:`.NeedsAndPartsListView` instance. + .. need-example:: .. needtable:: @@ -275,18 +275,13 @@ The used code must define a variable ``results``, which must be a list and conta # which are linked to each other. results = [] - # Lets create a needs_dict to address needs by ids more easily. - needs_dict = {x['id']: x for x in needs} - - for need in needs: - if need['type'] == 'req': - for links_id in need['links']: - if needs_dict[links_id]['type'] == 'spec': - results.append(need) - results.append(needs_dict[links_id]) - -The code has access to a variable called ``needs``, which contains a copy of all needs. -So manipulations on the values in ``needs`` do not have any affects. + + for need in needs.filter_types(["req"]): + for links_id in need['links']: + linked_need = needs.get_need(links_id) + if linked_need and linked_need['type'] == 'spec': + results.append(need) + results.append(linked_need) This mechanism can also be a good alternative for complex filter strings to save performance. For example if a filter string is using list comprehensions to get access to linked needs. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 075fd849d..a5a7ac35e 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -11,7 +11,6 @@ We will create need items, link them together, visualize the relationships betwe :root_id: T_CAR :config: tutorial :show_link_names: - :show_filters: :border_color: [status == 'open']:FF0000, [status == 'in progress']:0000FF, @@ -22,6 +21,17 @@ We will create need items, link them together, visualize the relationships betwe This tutorial assumes that you have already :ref:`installed sphinx-needs `, and that you have a basic understanding of how to use :external+sphinx:doc:`Sphinx ` and :external+sphinx:ref:`reStructuredText `. +Need Lifecycle +-------------- + +Within a sphinx build, a primary role of sphinx-needs is to manage the lifecycle of need items: + +1. **Collect**: During the read phase, need items are collected from the source files and configured external sources. +2. **Resolve**: After the read phase, the need items are post-processed to resolve dynamic fields and links, etc, then frozen. +3. **Analyse**: During the write phase, various directives/roles are available to reference, query, and output analysis of the needs. +4. **Render**: During the write phase, the need items are rendered into the output format, such as HTML or PDF. +5. **Validate**: During the final phase, the need items can be validated against configured checks. + Creating need items ------------------- diff --git a/sphinx_needs/api/__init__.py b/sphinx_needs/api/__init__.py index 6021aedec..1601cf1d6 100644 --- a/sphinx_needs/api/__init__.py +++ b/sphinx_needs/api/__init__.py @@ -4,7 +4,7 @@ add_need_type, get_need_types, ) -from .need import add_external_need, add_need, del_need, make_hashed_id +from .need import add_external_need, add_need, del_need, get_needs_view, make_hashed_id __all__ = ( "add_dynamic_function", @@ -14,5 +14,6 @@ "add_need_type", "del_need", "get_need_types", + "get_needs_view", "make_hashed_id", ) diff --git a/sphinx_needs/api/need.py b/sphinx_needs/api/need.py index b734c4cf5..4b8217a0e 100644 --- a/sphinx_needs/api/need.py +++ b/sphinx_needs/api/need.py @@ -31,6 +31,7 @@ from sphinx_needs.nodes import Need from sphinx_needs.roles.need_part import find_parts, update_need_with_parts from sphinx_needs.utils import jinja_parse +from sphinx_needs.views import NeedsView logger = get_logger(__name__) @@ -805,3 +806,15 @@ def _merge_global_options( # has at least the key. if key not in needs_info.keys(): needs_info[key] = "" + + +def get_needs_view(app: Sphinx) -> NeedsView: + """Return a read-only view of all resolved needs. + + .. important:: this should only be called within the write phase, + after the needs have been fully collected. + If not already done, this will ensure all needs are resolved + (e.g. back links have been computed etc), + and then lock the data to prevent further modification. + """ + return SphinxNeedsData(app.env).get_needs_view() diff --git a/sphinx_needs/builder.py b/sphinx_needs/builder.py index ad026c130..9acbd179b 100644 --- a/sphinx_needs/builder.py +++ b/sphinx_needs/builder.py @@ -9,8 +9,6 @@ from sphinx_needs.config import NeedsSphinxConfig from sphinx_needs.data import SphinxNeedsData -from sphinx_needs.directives.need import post_process_needs_data -from sphinx_needs.filter_common import filter_needs_view from sphinx_needs.logging import get_logger, log_warning from sphinx_needs.needsfile import NeedsList @@ -58,9 +56,10 @@ def write( return super().write(build_docnames, updated_docnames, method) def finish(self) -> None: - post_process_needs_data(self.app) + from sphinx_needs.filter_common import filter_needs_view data = SphinxNeedsData(self.env) + needs = data.get_needs_view() needs_config = NeedsSphinxConfig(self.env.config) filters = data.get_or_create_filters() version = getattr(self.env.config, "version", "unset") @@ -84,7 +83,7 @@ def finish(self) -> None: filter_string = needs_config.builder_filter filtered_needs = filter_needs_view( - data.get_needs_view(), + needs, needs_config, filter_string, append_warning="(from need_builder_filter)", @@ -173,7 +172,7 @@ def write( pass def finish(self) -> None: - post_process_needs_data(self.app) + from sphinx_needs.filter_common import filter_needs_view data = SphinxNeedsData(self.env) version = getattr(self.env.config, "version", "unset") diff --git a/sphinx_needs/data.py b/sphinx_needs/data.py index a61eb1430..2d0704bc1 100644 --- a/sphinx_needs/data.py +++ b/sphinx_needs/data.py @@ -12,13 +12,13 @@ Literal, Mapping, NewType, - Sequence, TypedDict, ) from sphinx.util.logging import getLogger from sphinx_needs.logging import log_warning +from sphinx_needs.views import NeedsView if TYPE_CHECKING: from docutils.nodes import Text @@ -62,6 +62,8 @@ class NeedsPartType(TypedDict): links_back: list[str] """List of need IDs, which are referencing this part.""" + # note back links for each type are also set dynamically in post_process_needs_data (-> create_back_links) + class CoreFieldParameters(TypedDict): """Parameters for core fields.""" @@ -690,19 +692,6 @@ class NeedsUmlType(NeedsBaseDataType): """A mutable view of the needs, before resolution """ -NeedsView = NewType("NeedsView", Mapping[str, NeedsInfoType]) -"""A read-only view of the needs, after resolution -(e.g. back links have been computed etc) -""" - -NeedsPartsView = NewType("NeedsPartsView", Sequence[NeedsInfoType]) -"""A read-only view of a sequence of needs and parts, -after resolution (e.g. back links have been computed etc) - -The parts are created by creating a copy of the need for each item in ``parts``, -and then overwriting a subset of fields with the values from the part. -""" - class SphinxNeedsData: """Centralised access to sphinx-needs data, stored within the Sphinx environment.""" @@ -713,10 +702,10 @@ def __init__(self, env: BuildEnvironment) -> None: @property def _env_needs(self) -> dict[str, NeedsInfoType]: try: - return self.env.needs_all_needs + return self.env._needs_all_needs except AttributeError: - self.env.needs_all_needs = {} - return self.env.needs_all_needs + self.env._needs_all_needs = {} + return self.env._needs_all_needs def has_need(self, need_id: str) -> bool: """Check if a need with the given ID exists.""" @@ -730,6 +719,8 @@ def add_need(self, need: NeedsInfoType) -> None: .. important:: this should only be called within the read phase, before the needs have been fully collected and resolved. """ + if self.needs_is_post_processed: + raise RuntimeError("Needs have already been post-processed and frozen.") self._env_needs[need["id"]] = need def remove_need(self, need_id: str) -> None: @@ -738,6 +729,8 @@ def remove_need(self, need_id: str) -> None: .. important:: this should only be called within the read phase, before the needs have been fully collected and resolved. """ + if self.needs_is_post_processed: + raise RuntimeError("Needs have already been post-processed and frozen.") if need_id in self._env_needs: del self._env_needs[need_id] self.remove_need_node(need_id) @@ -748,6 +741,8 @@ def remove_doc(self, docname: str) -> None: .. important:: this should only be called within the read phase, before the needs have been fully collected and resolved. """ + if self.needs_is_post_processed: + raise RuntimeError("Needs have already been post-processed and frozen.") for need_id in list(self._env_needs): if self._env_needs[need_id]["docname"] == docname: del self._env_needs[need_id] @@ -762,16 +757,37 @@ def get_needs_mutable(self) -> NeedsMutable: .. important:: this should only be called within the read phase, before the needs have been fully collected and resolved. """ + if self.needs_is_post_processed: + raise RuntimeError("Needs have already been post-processed and frozen.") return self._env_needs # type: ignore[return-value] def get_needs_view(self) -> NeedsView: - """Return a read-only view of all needs, after resolution. + """Return a read-only view of all resolved needs. .. important:: this should only be called within the write phase, - after the needs have been fully collected - and resolved (e.g. back links have been computed etc) + after the needs have been fully collected. + If not already done, this will ensure all needs are resolved + (e.g. back links have been computed etc), + and then lock the data to prevent further modification. """ - return self._env_needs # type: ignore[return-value] + if not self.needs_is_post_processed: + from sphinx_needs.directives.need import post_process_needs_data + + # TODO the following code may be good to make access stricter, however, + # it fails on rebuilds, where e.g. `build-finished` events can be called without the phase having been updated + # from sphinx.util.build_phase import BuildPhase + # if self.env.app.phase in (BuildPhase.INITIALIZATION, BuildPhase.READING): + # raise RuntimeError( + # "Trying to retrieve needs view incorrectly in init/read phase." + # ) + + post_process_needs_data(self.env.app) + + try: + return self.env._needs_view + except AttributeError: + self.env._needs_view = NeedsView._from_needs(self._env_needs) + return self.env._needs_view @property def has_export_filters(self) -> bool: diff --git a/sphinx_needs/directives/need.py b/sphinx_needs/directives/need.py index 6ad096a57..ede94e61b 100644 --- a/sphinx_needs/directives/need.py +++ b/sphinx_needs/directives/need.py @@ -373,16 +373,18 @@ def post_process_needs_data(app: Sphinx) -> None: After this function has been run, one should assume that the needs data is finalised, and so in principle should be treated as read-only. """ - needs_config = NeedsSphinxConfig(app.config) needs_data = SphinxNeedsData(app.env) - needs = needs_data.get_needs_mutable() - if needs and not needs_data.needs_is_post_processed: + if not needs_data.needs_is_post_processed: + needs_config = NeedsSphinxConfig(app.config) + needs = needs_data.get_needs_mutable() + app.emit("needs-before-post-processing", needs) extend_needs_data(needs, needs_data.get_or_create_extends(), needs_config) resolve_dynamic_values(needs, app) resolve_variants_options(needs, needs_config, app.builder.tags) check_links(needs, needs_config) create_back_links(needs, needs_config) process_constraints(needs, needs_config) + app.emit("needs-before-sealing", needs) needs_data.needs_is_post_processed = True @@ -404,8 +406,6 @@ def process_need_nodes(app: Sphinx, doctree: nodes.document, fromdocname: str) - if not needs_data.get_needs_view(): return - post_process_needs_data(app) - for extend_node in list(doctree.findall(Needextend)): remove_node_from_tree(extend_node) diff --git a/sphinx_needs/directives/needbar.py b/sphinx_needs/directives/needbar.py index d8c371506..eccd4e0d3 100644 --- a/sphinx_needs/directives/needbar.py +++ b/sphinx_needs/directives/needbar.py @@ -10,11 +10,7 @@ from sphinx_needs.config import NeedsSphinxConfig from sphinx_needs.data import NeedsBarType, SphinxNeedsData -from sphinx_needs.filter_common import ( - FilterBase, - expand_needs_view, - filter_needs_parts, -) +from sphinx_needs.filter_common import FilterBase, filter_needs_parts from sphinx_needs.logging import get_logger, log_warning from sphinx_needs.utils import ( add_doc, @@ -296,7 +292,7 @@ def process_needbar( # 5. process content local_data_number = [] # adds parts to need_list - need_list = expand_needs_view(needs_data.get_needs_view()) + need_list = needs_data.get_needs_view().to_list_with_parts() for line in local_data: line_number = [] diff --git a/sphinx_needs/directives/needflow/_graphviz.py b/sphinx_needs/directives/needflow/_graphviz.py index 080c43171..aa2572921 100644 --- a/sphinx_needs/directives/needflow/_graphviz.py +++ b/sphinx_needs/directives/needflow/_graphviz.py @@ -15,7 +15,7 @@ from sphinx.util.logging import getLogger from sphinx_needs.config import LinkOptionsType, NeedsSphinxConfig -from sphinx_needs.data import NeedsInfoType, NeedsView, SphinxNeedsData +from sphinx_needs.data import NeedsInfoType, SphinxNeedsData from sphinx_needs.debug import measure_time from sphinx_needs.diagrams_common import calculate_link from sphinx_needs.directives.needflow._directive import NeedflowGraphiz @@ -29,6 +29,7 @@ match_variants, remove_node_from_tree, ) +from sphinx_needs.views import NeedsView from ._shared import create_filter_paragraph, filter_by_tree, get_root_needs diff --git a/sphinx_needs/directives/needflow/_plantuml.py b/sphinx_needs/directives/needflow/_plantuml.py index 0df5f1f57..9d10b137b 100644 --- a/sphinx_needs/directives/needflow/_plantuml.py +++ b/sphinx_needs/directives/needflow/_plantuml.py @@ -12,12 +12,7 @@ ) from sphinx_needs.config import LinkOptionsType, NeedsSphinxConfig -from sphinx_needs.data import ( - NeedsFlowType, - NeedsInfoType, - NeedsView, - SphinxNeedsData, -) +from sphinx_needs.data import NeedsFlowType, NeedsInfoType, SphinxNeedsData from sphinx_needs.debug import measure_time from sphinx_needs.diagrams_common import calculate_link, create_legend from sphinx_needs.directives.needflow._directive import NeedflowPlantuml @@ -28,6 +23,7 @@ match_variants, remove_node_from_tree, ) +from sphinx_needs.views import NeedsView from ._shared import create_filter_paragraph, filter_by_tree, get_root_needs diff --git a/sphinx_needs/directives/needflow/_shared.py b/sphinx_needs/directives/needflow/_shared.py index 94ef39c4d..a607cd230 100644 --- a/sphinx_needs/directives/needflow/_shared.py +++ b/sphinx_needs/directives/needflow/_shared.py @@ -5,18 +5,15 @@ from docutils import nodes from sphinx_needs.config import LinkOptionsType -from sphinx_needs.data import ( - NeedsFlowType, - NeedsInfoType, - NeedsView, -) +from sphinx_needs.data import NeedsFlowType, NeedsInfoType from sphinx_needs.logging import get_logger +from sphinx_needs.views import NeedsView logger = get_logger(__name__) def filter_by_tree( - all_needs: NeedsView, + needs_view: NeedsView, root_id: str, link_types: list[LinkOptionsType], direction: Literal["both", "incoming", "outgoing"], @@ -24,9 +21,9 @@ def filter_by_tree( ) -> NeedsView: """Filter all needs by the given ``root_id``, and all needs that are connected to the root need by the given ``link_types``, in the given ``direction``.""" - if root_id not in all_needs: - return NeedsView({}) - roots = {root_id: (0, all_needs[root_id])} + if root_id not in needs_view: + return needs_view.filter_ids([]) + roots = {root_id: (0, needs_view[root_id])} link_prefixes = ( ("_back",) if direction == "incoming" @@ -37,24 +34,25 @@ def filter_by_tree( links_to_process = [ link["option"] + d for link in link_types for d in link_prefixes ] - need_items: dict[str, NeedsInfoType] = {} + + need_ids: list[str] = [] while roots: root_id, (root_depth, root) = roots.popitem() - if root_id in need_items: + if root_id in need_ids: continue if depth is not None and root_depth > depth: continue - need_items[root_id] = root + need_ids.append(root_id) for link_type_name in links_to_process: roots.update( { - i: (root_depth + 1, all_needs[i]) + i: (root_depth + 1, needs_view[i]) for i in root.get(link_type_name, []) # type: ignore[attr-defined] - if i in all_needs + if i in needs_view } ) - return NeedsView(need_items) + return needs_view.filter_ids(need_ids) def get_root_needs(found_needs: list[NeedsInfoType]) -> list[NeedsInfoType]: diff --git a/sphinx_needs/directives/needpie.py b/sphinx_needs/directives/needpie.py index 3a47e571e..1244820bf 100644 --- a/sphinx_needs/directives/needpie.py +++ b/sphinx_needs/directives/needpie.py @@ -12,7 +12,7 @@ from sphinx_needs.data import NeedsPieType, SphinxNeedsData from sphinx_needs.debug import measure_time from sphinx_needs.directives.utils import no_needs_found_paragraph -from sphinx_needs.filter_common import FilterBase, expand_needs_view, filter_needs_parts +from sphinx_needs.filter_common import FilterBase, filter_needs_parts from sphinx_needs.logging import get_logger, log_warning from sphinx_needs.utils import ( add_doc, @@ -158,7 +158,7 @@ def process_needpie( sizes = [] # adds parts to need_list - need_list = expand_needs_view(needs_data.get_needs_view()) + need_list = needs_data.get_needs_view().to_list_with_parts() if content and not current_needpie["filter_func"]: for line in content: if line.isdigit(): diff --git a/sphinx_needs/directives/needsequence.py b/sphinx_needs/directives/needsequence.py index 05a23c91b..4e2390293 100644 --- a/sphinx_needs/directives/needsequence.py +++ b/sphinx_needs/directives/needsequence.py @@ -12,12 +12,7 @@ ) from sphinx_needs.config import NeedsSphinxConfig -from sphinx_needs.data import ( - NeedsInfoType, - NeedsSequenceType, - NeedsView, - SphinxNeedsData, -) +from sphinx_needs.data import NeedsInfoType, NeedsSequenceType, SphinxNeedsData from sphinx_needs.diagrams_common import ( DiagramBase, add_config, @@ -30,6 +25,7 @@ from sphinx_needs.filter_common import FilterBase from sphinx_needs.logging import get_logger, log_warning from sphinx_needs.utils import add_doc, remove_node_from_tree +from sphinx_needs.views import NeedsView logger = get_logger(__name__) diff --git a/sphinx_needs/external_needs.py b/sphinx_needs/external_needs.py index 5182adaa1..7a596ac1a 100644 --- a/sphinx_needs/external_needs.py +++ b/sphinx_needs/external_needs.py @@ -167,7 +167,7 @@ def load_external_needs(app: Sphinx, env: BuildEnvironment, docname: str) -> Non # check if external needs already exist ext_need_id = need_params["id"] - need = SphinxNeedsData(env).get_needs_view().get(ext_need_id) + need = SphinxNeedsData(env).get_needs_mutable().get(ext_need_id) if need is not None: # check need_params for more detail diff --git a/sphinx_needs/filter_common.py b/sphinx_needs/filter_common.py index 447223ff8..63178d451 100644 --- a/sphinx_needs/filter_common.py +++ b/sphinx_needs/filter_common.py @@ -5,10 +5,11 @@ from __future__ import annotations +import ast import re from timeit import default_timer as timer from types import CodeType -from typing import Any, Iterable, TypedDict, TypeVar +from typing import Any, Iterable, TypedDict, overload from docutils import nodes from docutils.parsers.rst import directives @@ -21,15 +22,13 @@ NeedsFilteredBaseType, NeedsInfoType, NeedsMutable, - NeedsPartsView, - NeedsView, SphinxNeedsData, ) from sphinx_needs.debug import measure_time, measure_time_func from sphinx_needs.logging import log_warning -from sphinx_needs.roles.need_part import iter_need_parts from sphinx_needs.utils import check_and_get_external_filter_func from sphinx_needs.utils import logger as log +from sphinx_needs.views import NeedsAndPartsListView, NeedsView class FilterAttributesType(TypedDict): @@ -123,16 +122,10 @@ def process_filters( """ start = timer() needs_config = NeedsSphinxConfig(app.config) - found_needs: list[NeedsInfoType] # check if include external needs if not include_external: - needs_view = NeedsView( - {id: need for id, need in needs_view.items() if not need["is_external"]} - ) - - # Add all need_parts of given needs to the search list - all_needs_incl_parts = expand_needs_view(needs_view) + needs_view = needs_view.filter_is_external(False) # Check if external filter code is defined try: @@ -146,73 +139,43 @@ def process_filters( ) return [] - filter_code = None - # Get filter_code from - if not filter_code and filter_data["filter_code"]: - filter_code = "\n".join(filter_data["filter_code"]) + filter_code = ( + "\n".join(filter_data["filter_code"]) if filter_data["filter_code"] else None + ) + + found_needs: list[NeedsInfoType] = [] if (not filter_code or filter_code.isspace()) and not ff_result: - if bool(filter_data["status"] or filter_data["tags"] or filter_data["types"]): - found_needs_by_options: list[NeedsInfoType] = [] - for need_info in all_needs_incl_parts: - status_filter_passed = False - if ( - not filter_data["status"] - or need_info["status"] - and need_info["status"] in filter_data["status"] - ): - # Filtering for status was not requested or match was found - status_filter_passed = True - - tags_filter_passed = False - if ( - len(set(need_info["tags"]) & set(filter_data["tags"])) > 0 - or len(filter_data["tags"]) == 0 - ): - tags_filter_passed = True - - type_filter_passed = False - if ( - need_info["type"] in filter_data["types"] - or need_info["type_name"] in filter_data["types"] - or len(filter_data["types"]) == 0 - ): - type_filter_passed = True - - if status_filter_passed and tags_filter_passed and type_filter_passed: - found_needs_by_options.append(need_info) - # Get need by filter string - found_needs_by_string = filter_needs_parts( - all_needs_incl_parts, - needs_config, - filter_data["filter"], - location=location, - ) - # Make an intersection of both lists - found_needs = intersection_of_need_results( - found_needs_by_options, found_needs_by_string - ) - else: - # There is no other config as the one for filter string. - # So we only need this result. - found_needs = filter_needs_parts( - all_needs_incl_parts, - needs_config, - filter_data["filter"], - location=location, + # TODO these may not be correct for parts + filtered_needs = needs_view + if filter_data["status"]: + filtered_needs = filtered_needs.filter_statuses(filter_data["status"]) + if filter_data["tags"]: + filtered_needs = filtered_needs.filter_has_tag(filter_data["tags"]) + if filter_data["types"]: + filtered_needs = filtered_needs.filter_types( + filter_data["types"], or_type_names=True ) + + # Get need by filter string + found_needs = filter_needs_parts( + filtered_needs.to_list_with_parts(), + needs_config, + filter_data["filter"], + location=location, + ) else: # The filter results may be dirty, as it may continue manipulated needs. found_dirty_needs: list[NeedsInfoType] = [] if filter_code: # code from content # TODO better context type - context: dict[str, list[NeedsInfoType]] = { - "needs": all_needs_incl_parts, # type: ignore[dict-item] - "results": [], + context: dict[str, NeedsAndPartsListView] = { + "needs": needs_view.to_list_with_parts(), + "results": [], # type: ignore[dict-item] } exec(filter_code, context) - found_dirty_needs = context["results"] + found_dirty_needs = context["results"] # type: ignore[assignment] elif ff_result: # code from external file args = [] if ff_result.args: @@ -224,7 +187,9 @@ def process_filters( ff_result.func, category="filter_func", source="user" ) filter_func( - needs=all_needs_incl_parts, results=found_dirty_needs, **args_context + needs=needs_view.to_list_with_parts(), + results=found_dirty_needs, + **args_context, ) else: log_warning( @@ -232,15 +197,13 @@ def process_filters( ) return [] - found_needs = [] - # Check if config allow unsafe filters if needs_config.allow_unsafe_filters: found_needs = found_dirty_needs else: # Just take the ids from search result and use the related, but original need found_need_ids = [x["id_complete"] for x in found_dirty_needs] - for need in all_needs_incl_parts: + for need in needs_view.to_list_with_parts(): if need["id_complete"] in found_need_ids: found_needs.append(need) @@ -278,26 +241,6 @@ def process_filters( return found_needs -def expand_needs_view(needs_view: NeedsView) -> NeedsPartsView: - """Turns a needs view into a sequence of needs, - expanding all ``need["parts"]`` to be items of the list. - """ - all_needs_incl_parts = [] - for need in needs_view.values(): - all_needs_incl_parts.append(need) - for need_part in iter_need_parts(need): - all_needs_incl_parts.append(need_part) - - return NeedsPartsView(all_needs_incl_parts) - - -T = TypeVar("T") - - -def intersection_of_need_results(list_a: list[T], list_b: list[T]) -> list[T]: - return [a for a in list_a if a in list_b] - - def filter_needs_mutable( needs: NeedsMutable, config: NeedsSphinxConfig, @@ -317,6 +260,114 @@ def filter_needs_mutable( ) +@overload +def _analyze_and_apply_expr( + needs: NeedsView, expr: ast.expr +) -> tuple[NeedsView, bool]: ... + + +@overload +def _analyze_and_apply_expr( + needs: NeedsAndPartsListView, expr: ast.expr +) -> tuple[NeedsAndPartsListView, bool]: ... + + +def _analyze_and_apply_expr( + needs: NeedsView | NeedsAndPartsListView, expr: ast.expr +) -> tuple[NeedsView | NeedsAndPartsListView, bool]: + """Analyze the expr for known filter patterns, + and apply them to the given needs. + + :returns: the needs (potentially filtered), + and a boolean denoting if it still requires python eval filtering + """ + if isinstance(expr, (ast.Str, ast.Constant)): + if isinstance(expr.s, (str, bool)): + # "value" / True / False + return needs if expr.s else needs.filter_ids([]), False + + elif isinstance(expr, ast.Name): + # x + if expr.id == "is_external": + return needs.filter_is_external(True), False + + elif isinstance(expr, ast.Compare): + # + if len(expr.ops) == 1 and isinstance(expr.ops[0], ast.Eq): + # x == y + if ( + isinstance(expr.left, ast.Name) + and len(expr.comparators) == 1 + and isinstance(expr.comparators[0], (ast.Str, ast.Constant)) + ): + # x == "value" + field = expr.left.id + value = expr.comparators[0].s + elif ( + isinstance(expr.left, (ast.Str, ast.Constant)) + and len(expr.comparators) == 1 + and isinstance(expr.comparators[0], ast.Name) + ): + # "value" == x + field = expr.comparators[0].id + value = expr.left.s + else: + return needs, True + + if field == "id": + # id == value + return needs.filter_ids([value]), False + elif field == "type": + # type == value + return needs.filter_types([value]), False + elif field == "status": + # status == value + return needs.filter_statuses([value]), False + elif field == "is_external": + # is_external == value + return needs.filter_is_external(value), False + + elif len(expr.ops) == 1 and isinstance(expr.ops[0], ast.In): + # in + if ( + isinstance(expr.left, ast.Name) + and len(expr.comparators) == 1 + and isinstance(expr.comparators[0], (ast.List, ast.Tuple, ast.Set)) + and all( + isinstance(elt, (ast.Str, ast.Constant)) + for elt in expr.comparators[0].elts + ) + ): + values = [elt.s for elt in expr.comparators[0].elts] # type: ignore[attr-defined] + if expr.left.id == "id": + # id in ["a", "b", ...] + return needs.filter_ids(values), False + if expr.left.id == "status": + # status in ["a", "b", ...] + return needs.filter_statuses(values), False + elif expr.left.id == "type": + # type in ["a", "b", ...] + return needs.filter_types(values), False + elif ( + isinstance(expr.left, (ast.Str, ast.Constant)) + and len(expr.comparators) == 1 + and isinstance(expr.comparators[0], ast.Name) + and expr.comparators[0].id == "tags" + ): + # "value" in tags + return needs.filter_has_tag([expr.left.s]), False + + elif isinstance((and_op := expr), ast.BoolOp) and isinstance(and_op.op, ast.And): + # x and y and ... + requires_eval = False + for operand in and_op.values: + needs, _requires_eval = _analyze_and_apply_expr(needs, operand) + requires_eval |= _requires_eval + return needs, requires_eval + + return needs, True + + def filter_needs_view( needs: NeedsView, config: NeedsSphinxConfig, @@ -325,7 +376,27 @@ def filter_needs_view( *, location: tuple[str, int | None] | nodes.Node | None = None, append_warning: str = "", + strict_eval: bool = False, ) -> list[NeedsInfoType]: + if not filter_string: + return list(needs.values()) + + try: + body = ast.parse(filter_string).body + except Exception: + pass # warning already emitted in filter_needs + else: + if len(body) == 1 and isinstance((expr := body[0]), ast.Expr): + needs, requires_eval = _analyze_and_apply_expr(needs, expr.value) + if not requires_eval: + return list(needs.values()) + + if strict_eval: + # this is mainly used for testing purposes, to check if expression analysis is working + raise RuntimeError( + f"Strict eval mode, but no simple filter found: {filter_string!r}" + ) + return filter_needs( needs.values(), config, @@ -337,14 +408,34 @@ def filter_needs_view( def filter_needs_parts( - needs: NeedsPartsView, + needs: NeedsAndPartsListView, config: NeedsSphinxConfig, filter_string: None | str = "", current_need: NeedsInfoType | None = None, *, location: tuple[str, int | None] | nodes.Node | None = None, append_warning: str = "", + strict_eval: bool = False, ) -> list[NeedsInfoType]: + if not filter_string: + return list(needs) + + try: + body = ast.parse(filter_string).body + except Exception: + pass # warning already emitted in filter_needs + else: + if len(body) == 1 and isinstance((expr := body[0]), ast.Expr): + needs, requires_eval = _analyze_and_apply_expr(needs, expr.value) + if not requires_eval: + return list(needs) + + if strict_eval: + # this is mainly used for testing purposes, to check if expression analysis is working + raise RuntimeError( + f"Strict eval mode, but no simple filter found: {filter_string!r}" + ) + return filter_needs( needs, config, diff --git a/sphinx_needs/functions/common.py b/sphinx_needs/functions/common.py index 5bd1e1787..6209b9677 100644 --- a/sphinx_needs/functions/common.py +++ b/sphinx_needs/functions/common.py @@ -14,19 +14,20 @@ from sphinx_needs.api.exceptions import NeedsInvalidFilter from sphinx_needs.config import NeedsSphinxConfig -from sphinx_needs.data import NeedsInfoType, NeedsView +from sphinx_needs.data import NeedsInfoType, NeedsMutable from sphinx_needs.filter_common import ( - filter_needs_view, + filter_needs, filter_single_need, ) from sphinx_needs.logging import log_warning from sphinx_needs.utils import logger +from sphinx_needs.views import NeedsView def test( app: Sphinx, need: NeedsInfoType | None, - needs: NeedsView, + needs: NeedsMutable | NeedsView, *args: Any, **kwargs: Any, ) -> str: @@ -50,7 +51,7 @@ def test( def echo( app: Sphinx, need: NeedsInfoType | None, - needs: NeedsView, + needs: NeedsMutable | NeedsView, text: str, *args: Any, **kwargs: Any, @@ -72,7 +73,7 @@ def echo( def copy( app: Sphinx, need: NeedsInfoType | None, - needs: NeedsView, + needs: NeedsMutable | NeedsView, option: str, need_id: str | None = None, lower: bool = False, @@ -141,14 +142,15 @@ def copy( need = needs[need_id] if filter: - result = filter_needs_view( - needs, + location = ( + (need["docname"], need["lineno"]) if need and need["docname"] else None + ) + result = filter_needs( + needs.values(), NeedsSphinxConfig(app.config), filter, need, - location=(need["docname"], need["lineno"]) - if need and need["docname"] - else None, + location=location, ) if result: need = result[0] @@ -172,7 +174,7 @@ def copy( def check_linked_values( app: Sphinx, need: NeedsInfoType | None, - needs: NeedsView, + needs: NeedsMutable | NeedsView, result: Any, search_option: str, search_value: Any, @@ -301,7 +303,7 @@ def check_linked_values( def calc_sum( app: Sphinx, need: NeedsInfoType | None, - needs: NeedsView, + needs: NeedsMutable | NeedsView, option: str, filter: str | None = None, links_only: bool = False, @@ -402,7 +404,7 @@ def calc_sum( def links_from_content( app: Sphinx, need: NeedsInfoType | None, - needs: NeedsView, + needs: NeedsMutable | NeedsView, need_id: str | None = None, filter: str | None = None, ) -> list[str]: diff --git a/sphinx_needs/functions/functions.py b/sphinx_needs/functions/functions.py index a849e87cb..cd7ce4c91 100644 --- a/sphinx_needs/functions/functions.py +++ b/sphinx_needs/functions/functions.py @@ -19,12 +19,13 @@ from sphinx.util.tags import Tags from sphinx_needs.config import NeedsSphinxConfig -from sphinx_needs.data import NeedsInfoType, NeedsMutable, NeedsView, SphinxNeedsData +from sphinx_needs.data import NeedsInfoType, NeedsMutable, SphinxNeedsData from sphinx_needs.debug import measure_time_func from sphinx_needs.logging import get_logger, log_warning from sphinx_needs.nodes import Need from sphinx_needs.roles.need_func import NeedFunc from sphinx_needs.utils import NEEDS_FUNCTIONS, match_variants +from sphinx_needs.views import NeedsView logger = get_logger(__name__) unicode = str @@ -40,7 +41,7 @@ def __call__( self, app: Sphinx, need: NeedsInfoType | None, - needs: NeedsView, + needs: NeedsView | NeedsMutable, *args: Any, **kwargs: Any, ) -> str | int | float | list[str] | list[int] | list[float] | None: ... @@ -78,6 +79,7 @@ def register_func(need_function: DynamicFunction, name: str | None = None) -> No def execute_func( app: Sphinx, need: NeedsInfoType | None, + needs: NeedsView | NeedsMutable, func_string: str, location: str | tuple[str | None, int | None] | nodes.Node | None, ) -> str | int | float | list[str] | list[int] | list[float] | None: @@ -118,7 +120,7 @@ def execute_func( func_return = func( app, need, - SphinxNeedsData(app.env).get_needs_view(), + needs, *func_args, **func_kwargs, ) @@ -201,7 +203,9 @@ def find_and_replace_node_content( msg = f"The [[{func_string}]] syntax in need content is deprecated. Replace with :ndf:`{func_string}` instead." log_warning(logger, msg, "deprecation", location=node) - func_return = execute_func(env.app, need, func_string, node) + func_return = execute_func( + env.app, need, SphinxNeedsData(env).get_needs_view(), func_string, node + ) if isinstance(func_return, list): func_return = ", ".join(str(el) for el in func_return) @@ -266,7 +270,7 @@ def resolve_dynamic_values(needs: NeedsMutable, app: Sphinx) -> None: while func_call: try: func_call, func_return = _detect_and_execute_field( - need[need_option], need, app + need[need_option], need, needs, app ) except FunctionParsingException: raise SphinxError( @@ -298,7 +302,7 @@ def resolve_dynamic_values(needs: NeedsMutable, app: Sphinx) -> None: for element in need[need_option]: try: func_call, func_return = _detect_and_execute_field( - element, need, app + element, need, needs, app ) except FunctionParsingException: raise SphinxError( @@ -401,7 +405,7 @@ def check_and_get_content( func_call = func_match.group(1) # Extract function call func_return = execute_func( - env.app, need, func_call, location + env.app, need, SphinxNeedsData(env).get_needs_view(), func_call, location ) # Execute function call and get return value if isinstance(func_return, list): @@ -415,13 +419,10 @@ def check_and_get_content( def _detect_and_execute_field( - content: Any, need: NeedsInfoType, app: Sphinx + content: Any, need: NeedsInfoType, needs: NeedsMutable, app: Sphinx ) -> tuple[str | None, str | int | float | list[str] | list[int] | list[float] | None]: """Detects if given need field value is a function call and executes it.""" - try: - content = str(content) - except UnicodeEncodeError: - content = content.encode("utf-8") + content = str(content) func_match = FUNC_RE.search(content) if func_match is None: @@ -431,6 +432,7 @@ def _detect_and_execute_field( func_return = execute_func( app, need, + needs, func_call, (need["docname"], need["lineno"]) if need["docname"] else None, ) # Execute function call and get return value diff --git a/sphinx_needs/needs.py b/sphinx_needs/needs.py index 44051b063..65a301503 100644 --- a/sphinx_needs/needs.py +++ b/sphinx_needs/needs.py @@ -278,6 +278,10 @@ def setup(app: Sphinx) -> dict[str, Any]: # This should be called last, so that need-styles can override styles from used libraries app.connect("env-updated", install_styles_static_files) + # emitted during post_process_needs_data, both are passed the mutable needs dict + app.add_event("needs-before-post-processing") + app.add_event("needs-before-sealing") + # There is also the event doctree-read. # But it looks like in this event no references are already solved, which # makes trouble in our code. @@ -503,7 +507,6 @@ def prepare_env(app: Sphinx, env: BuildEnvironment, _docname: str) -> None: """ needs_config = NeedsSphinxConfig(app.config) data = SphinxNeedsData(env) - data.get_needs_view() data.get_or_create_filters() data.get_or_create_docs() services = data.get_or_create_services() diff --git a/sphinx_needs/roles/need_count.py b/sphinx_needs/roles/need_count.py index 90e910504..2bb861f74 100644 --- a/sphinx_needs/roles/need_count.py +++ b/sphinx_needs/roles/need_count.py @@ -12,10 +12,7 @@ from sphinx_needs.api.exceptions import NeedsInvalidFilter from sphinx_needs.config import NeedsSphinxConfig from sphinx_needs.data import SphinxNeedsData -from sphinx_needs.filter_common import ( - expand_needs_view, - filter_needs_parts, -) +from sphinx_needs.filter_common import filter_needs_parts from sphinx_needs.logging import get_logger log = get_logger(__name__) @@ -39,7 +36,7 @@ def process_need_count( if filter: filters = filter.split(" ? ") if len(filters) == 1: - need_list = expand_needs_view(needs_view) # adds parts to need_list + need_list = needs_view.to_list_with_parts() amount = str( len( filter_needs_parts( @@ -51,7 +48,7 @@ def process_need_count( ) ) elif len(filters) == 2: - need_list = expand_needs_view(needs_view) # adds parts to need_list + need_list = needs_view.to_list_with_parts() amount_1 = len( filter_needs_parts( need_list, needs_config, filters[0], location=node_need_count diff --git a/sphinx_needs/roles/need_func.py b/sphinx_needs/roles/need_func.py index 309f883d3..d5d09ae13 100644 --- a/sphinx_needs/roles/need_func.py +++ b/sphinx_needs/roles/need_func.py @@ -9,7 +9,7 @@ from sphinx.environment import BuildEnvironment from sphinx.util.docutils import SphinxRole -from sphinx_needs.data import NeedsInfoType +from sphinx_needs.data import NeedsInfoType, SphinxNeedsData from sphinx_needs.logging import get_logger, log_warning from sphinx_needs.utils import add_doc @@ -60,7 +60,13 @@ def get_text(self, env: BuildEnvironment, need: NeedsInfoType | None) -> nodes.T from sphinx_needs.functions.functions import check_and_get_content, execute_func if not self.with_brackets: - func_return = execute_func(env.app, need, self.astext(), self) + func_return = execute_func( + env.app, + need, + SphinxNeedsData(env).get_needs_view(), + self.astext(), + self, + ) if isinstance(func_return, list): func_return = ", ".join(str(el) for el in func_return) diff --git a/sphinx_needs/roles/need_part.py b/sphinx_needs/roles/need_part.py index b948f13ba..c4b9e660d 100644 --- a/sphinx_needs/roles/need_part.py +++ b/sphinx_needs/roles/need_part.py @@ -18,7 +18,7 @@ from sphinx.environment import BuildEnvironment from sphinx.util.nodes import make_refnode -from sphinx_needs.data import NeedsInfoType +from sphinx_needs.data import NeedsInfoType, NeedsPartType from sphinx_needs.logging import get_logger, log_warning from sphinx_needs.nodes import Need @@ -41,6 +41,16 @@ def process_need_part( part_pattern = re.compile(r"\(([\w-]+)\)(.*)", re.DOTALL) +def create_need_from_part(need: NeedsInfoType, part: NeedsPartType) -> NeedsInfoType: + """Create a full need from a part and its parent need.""" + full_part: NeedsInfoType = {**need, **part} + full_part["id_complete"] = f"{need['id']}.{part['id']}" + full_part["id_parent"] = need["id"] + full_part["is_need"] = False + full_part["is_part"] = True + return full_part + + def iter_need_parts(need: NeedsInfoType) -> Iterable[NeedsInfoType]: """Yield all parts, a.k.a sub-needs, from a need. @@ -48,13 +58,7 @@ def iter_need_parts(need: NeedsInfoType) -> Iterable[NeedsInfoType]: and overrides the content of the parent need. """ for part in need["parts"].values(): - full_part: NeedsInfoType = {**need, **part} - full_part["id_complete"] = f"{need['id']}.{part['id']}" - full_part["id_parent"] = need["id"] - full_part["is_need"] = False - full_part["is_part"] = True - - yield full_part + yield create_need_from_part(need, part) def update_need_with_parts( diff --git a/sphinx_needs/utils.py b/sphinx_needs/utils.py index 911328f36..63ae9db88 100644 --- a/sphinx_needs/utils.py +++ b/sphinx_needs/utils.py @@ -17,9 +17,10 @@ from sphinx_needs.api.exceptions import NeedsInvalidFilter from sphinx_needs.config import LinkOptionsType, NeedsSphinxConfig -from sphinx_needs.data import NeedsInfoType, NeedsPartsView, NeedsView, SphinxNeedsData +from sphinx_needs.data import NeedsInfoType, SphinxNeedsData from sphinx_needs.defaults import NEEDS_PROFILING from sphinx_needs.logging import get_logger, log_warning +from sphinx_needs.views import NeedsAndPartsListView, NeedsView try: from typing import TypedDict @@ -315,7 +316,7 @@ class FilterFunc(Protocol): def __call__( self, *, - needs: NeedsPartsView, + needs: NeedsAndPartsListView, results: list[Any], **kwargs: str, ) -> None: ... diff --git a/sphinx_needs/views.py b/sphinx_needs/views.py new file mode 100644 index 000000000..122c57899 --- /dev/null +++ b/sphinx_needs/views.py @@ -0,0 +1,402 @@ +from __future__ import annotations + +from itertools import chain +from typing import TYPE_CHECKING, Iterable, Iterator, List, Mapping, Optional, Tuple + +if TYPE_CHECKING: + from sphinx_needs.data import NeedsInfoType + + +_IdSet = List[Tuple[str, Optional[str]]] +"""Set of (need, part) ids.""" + + +class _Indexes: + """Indexes of common fields for fast filtering of needs.""" + + __slots__ = ( + "is_external", + "status", + "tags", + "types", + "type_names", + "parts_to_needs", + ) + + def __init__( + self, + *, + is_external: dict[bool, _IdSet], + types: dict[str, _IdSet], + type_names: dict[str, _IdSet], + status: dict[str | None, _IdSet], + tags: dict[str, _IdSet], + parts_to_needs: dict[str, list[str]], + ) -> None: + self.is_external = is_external + """Mapping of `is_external` values to (need, part) ids.""" + self.types = types + """Mapping of `type` values to (need, part) ids.""" + self.type_names = type_names + """Mapping of `type_name` values to (need, part) ids.""" + self.status = status + """Mapping of `status` values to (need, part) ids.""" + self.tags = tags + """Mapping of `tags` values to (need, part) ids that contain them.""" + self.parts_to_needs = parts_to_needs + """Mapping of part ids to the needs that contain them.""" + + +class _LazyIndexes: + """A lazily computed view of indexes for needs.""" + + __slots__ = ("_needs", "_indexes") + + def __init__(self, needs: Mapping[str, NeedsInfoType]) -> None: + self._needs = needs + self._indexes: _Indexes | None = None + + @property + def needs(self) -> Mapping[str, NeedsInfoType]: + """Get the needs.""" + return self._needs + + @property + def indexes(self) -> _Indexes: + """Get the indexes, computing them if necessary.""" + if self._indexes is None: + self._indexes = self._compute() + return self._indexes + + def _compute(self) -> _Indexes: + """Lazily compute the indexes for the needs, when first requested.""" + _idx_is_external: dict[bool, _IdSet] = {} + _idx_types: dict[str, _IdSet] = {} + _idx_type_names: dict[str, _IdSet] = {} + _idx_status: dict[str | None, _IdSet] = {} + _idx_tags: dict[str, _IdSet] = {} + _idx_parts_to_needs: dict[str, list[str]] = {} + + for id, need in self._needs.items(): + _idx_is_external.setdefault(need["is_external"], []).append((id, None)) + _idx_types.setdefault(need["type"], []).append((id, None)) + _idx_type_names.setdefault(need["type_name"], []).append((id, None)) + _idx_status.setdefault(need["status"], []).append((id, None)) + for tag in need["tags"]: + _idx_tags.setdefault(tag, []).append((id, None)) + + for part_id, part in need["parts"].items(): + _idx_parts_to_needs.setdefault(part_id, []).append(id) + # In principle, parts should not have the fields below (and thus should be copied from the need), + # but there is currently no validation for this, so we also record them. + _idx_is_external.setdefault( + part.get("is_external", need["is_external"]), # type: ignore[arg-type] + [], + ).append((id, part_id)) + _idx_types.setdefault( + part.get("type", need["type"]), # type: ignore[arg-type] + [], + ).append((id, part_id)) + _idx_type_names.setdefault( + part.get("type_name", need["type_name"]), # type: ignore[arg-type] + [], + ).append((id, part_id)) + _idx_status.setdefault( + part.get("status", need["status"]), # type: ignore[arg-type] + [], + ).append((id, part_id)) + for tag in part.get("tags", need["tags"]): # type: ignore[attr-defined] + _idx_tags.setdefault(tag, []).append((id, part_id)) + + return _Indexes( + is_external=_idx_is_external, + types=_idx_types, + type_names=_idx_type_names, + status=_idx_status, + tags=_idx_tags, + parts_to_needs=_idx_parts_to_needs, + ) + + +class NeedsView(Mapping[str, "NeedsInfoType"]): + """A read-only view of needs, mapping need ids to need data, + with "fast" filtering methods. + + The needs are read-only and fully resolved + (e.g. dynamic values and back links have been computed etc) + """ + + __slots__ = ("_indexes", "_selected_ids", "_maybe_len") + + @classmethod + def _from_needs(cls, needs: Mapping[str, NeedsInfoType], /) -> NeedsView: + """Create a new view of needs from a mapping of needs.""" + return cls(_indexes=_LazyIndexes(needs), _selected_ids=None) + + def __init__( + self, + *, + _indexes: _LazyIndexes, + _selected_ids: dict[str, None] | None, + ) -> None: + """Create a new view of needs, + + .. important:: This class is not meant to be instantiated by users, + a singleton instance is managed by sphinx-needs. + + Instead, use the filter methods to create new views. + """ + self._indexes = _indexes + self._selected_ids = ( + _selected_ids # note we use a dict here like an ordered set + ) + # Cache the length of the needs when first requested + self._maybe_len: int | None = None + + @property + def _all_needs(self) -> Mapping[str, NeedsInfoType]: + return self._indexes.needs + + def __getitem__(self, key: str) -> NeedsInfoType: + if self._selected_ids is not None and key not in self._selected_ids: + raise KeyError(key) + return self._all_needs[key] + + def __iter__(self) -> Iterator[str]: + if self._selected_ids is None: + yield from self._all_needs + else: + for id in self._selected_ids: + if id in self._all_needs: + yield id + + def __len__(self) -> int: + if self._maybe_len is None: + self._maybe_len = sum(1 for _ in self) + return self._maybe_len + + def __bool__(self) -> bool: + try: + next(iter(self)) + except StopIteration: + return False + return True + + def to_list_with_parts(self) -> NeedsAndPartsListView: + """Create a new view with needs and parts.""" + if self._selected_ids is None: + return NeedsAndPartsListView(_indexes=self._indexes, _selected_ids=None) + _selected_ids: dict[tuple[str, str | None], None] = {} + for id in self._selected_ids: + if need := self._all_needs.get(id): + _selected_ids[(id, None)] = None + for part_id in need["parts"]: + _selected_ids[(id, part_id)] = None + return NeedsAndPartsListView( + _indexes=self._indexes, _selected_ids=_selected_ids + ) + + def _copy_filtered(self, ids: Iterable[tuple[str, str | None]]) -> NeedsView: + """Create a new view with only the needs with the given ids. + + This is a helper method for the filter methods, + it ensures that the lazy indexing is copied over, + so that they are not recomputed. + """ + selected_ids = {n: None for n, p in ids if p is None and n in self._all_needs} + if self._selected_ids is not None: + selected_ids = {n: None for n in self._selected_ids if n in selected_ids} + return NeedsView(_indexes=self._indexes, _selected_ids=selected_ids) + + def filter_ids(self, values: Iterable[str]) -> NeedsView: + """Create new view with needs filtered by the ``id`` field.""" + return self._copy_filtered((v, None) for v in values) + + def filter_is_external(self, value: bool) -> NeedsView: + """Create new view with only needs where ``is_external`` field is true/false.""" + return self._copy_filtered(self._indexes.indexes.is_external.get(value, [])) + + def filter_types(self, values: list[str], or_type_names: bool = False) -> NeedsView: + """Create new view with only needs that have certain ``type`` field values. + + :param values: List of types to filter by. + :param or_type_names: If True, filter by both ``type`` and ``type_name`` field + """ + if or_type_names: + return self._copy_filtered( + i + for value in values + for i in chain( + self._indexes.indexes.types.get(value, []), + self._indexes.indexes.type_names.get(value, []), + ) + ) + return self._copy_filtered( + i for value in values for i in self._indexes.indexes.types.get(value, []) + ) + + def filter_statuses(self, values: list[str]) -> NeedsView: + """Create new view with only needs that have certain ``status`` field values.""" + return self._copy_filtered( + i for value in values for i in self._indexes.indexes.status.get(value, []) + ) + + def filter_has_tag(self, values: list[str]) -> NeedsView: + """Create new view with only needs that have at least one of these values in the ``tags`` field list.""" + return self._copy_filtered( + i for value in values for i in self._indexes.indexes.tags.get(value, []) + ) + + +class NeedsAndPartsListView: + """A read-only view of needs and parts, + after resolution (e.g. back links have been computed etc) + + The parts are created by creating a copy of the need for each item in ``parts``, + and then overwriting a subset of fields with the values from the part. + """ + + __slots__ = ("_indexes", "_selected_ids", "_maybe_len") + + def __init__( + self, + *, + _indexes: _LazyIndexes, + _selected_ids: dict[tuple[str, str | None], None] | None, + ): + """Initialize a new view of needs. + + .. important:: This class is not meant to be instantiated by users, + a singleton instance is managed by sphinx-needs. + + Instead, create from a NeedsView instance. + """ + self._indexes = _indexes + self._selected_ids = ( + _selected_ids # note we use a dict here like an ordered set + ) + # Cache the length of the needs when first requested + self._maybe_len: int | None = None + + @property + def _all_needs(self) -> Mapping[str, NeedsInfoType]: + return self._indexes.needs + + def __iter__(self) -> Iterator[NeedsInfoType]: + """Iterate over the needs and parts in the view.""" + from sphinx_needs.roles.need_part import create_need_from_part + + if self._selected_ids is None: + for need in self._all_needs.values(): + yield need + for part in need["parts"].values(): + yield create_need_from_part(need, part) + else: + for id, part_id in self._selected_ids: + if id not in self._all_needs: + continue + if part_id is None: + yield self._all_needs[id] + else: + need = self._all_needs[id] + if part_id in need["parts"]: + yield create_need_from_part(need, need["parts"][part_id]) + + def __bool__(self) -> bool: + try: + next(iter(self)) + except StopIteration: + return False + return True + + def __len__(self) -> int: + if self._maybe_len is None: + self._maybe_len = sum(1 for _ in self) + return self._maybe_len + + def get_need(self, id: str, part_id: str | None = None) -> NeedsInfoType | None: + """Get a need by id, or return None if it does not exist. + + If ``part_id`` is provided, return the part of the need with that id, or None if it does not exist. + """ + from sphinx_needs.roles.need_part import create_need_from_part + + if part_id is None: + if self._selected_ids is None or (id, None) in self._selected_ids: + return self._all_needs.get(id) + else: + if ( + (self._selected_ids is None or (id, part_id) in self._selected_ids) + and (need := self._all_needs.get(id)) + and (part := need["parts"].get(part_id)) + ): + return create_need_from_part(need, part) + + return None + + def _copy_filtered( + self, ids: Iterable[tuple[str, str | None]] + ) -> NeedsAndPartsListView: + """Create a new view with only the needs/parts with the given ids.""" + if self._selected_ids is None: + selected_ids = {n: None for n in ids} + else: + selected_ids = {n: None for n in ids if n in self._selected_ids} + return NeedsAndPartsListView(_indexes=self._indexes, _selected_ids=selected_ids) + + def filter_ids(self, values: Iterable[str]) -> NeedsAndPartsListView: + """Create new view with needs/parts filtered by the ``id`` field.""" + selected_ids: Iterable[tuple[str, str | None]] + if self._selected_ids is None: + # the id can either be a need or a part id + selected_ids = [ + (need_id, None) for need_id in values if need_id in self._all_needs + ] + for part_id in values: + selected_ids.extend( + (need_id, part_id) + for need_id in self._indexes.indexes.parts_to_needs.get(part_id, []) + ) + else: + val_set = set(values) + selected_ids = ( + v for v in self._selected_ids if v[0] in val_set or v[1] in val_set + ) + return self._copy_filtered(selected_ids) + + def filter_is_external(self, value: bool) -> NeedsAndPartsListView: + """Create new view with only needs/parts where ``is_external`` field is true/false.""" + return self._copy_filtered(self._indexes.indexes.is_external.get(value, [])) + + def filter_types( + self, values: list[str], or_type_names: bool = False + ) -> NeedsAndPartsListView: + """Create new view with only needs/parts that have certain ``type`` field values. + + :param values: List of types to filter by. + :param or_type_names: If True, filter by both ``type`` and ``type_name`` field + """ + if or_type_names: + return self._copy_filtered( + i + for value in values + for i in chain( + self._indexes.indexes.types.get(value, []), + self._indexes.indexes.type_names.get(value, []), + ) + ) + return self._copy_filtered( + i for value in values for i in self._indexes.indexes.types.get(value, []) + ) + + def filter_statuses(self, values: list[str]) -> NeedsAndPartsListView: + """Create new view with only needs/parts that have certain ``status`` field values.""" + return self._copy_filtered( + i for value in values for i in self._indexes.indexes.status.get(value, []) + ) + + def filter_has_tag(self, values: list[str]) -> NeedsAndPartsListView: + """Create new view with only needs/parts that have at least one of these values in the ``tags`` field list.""" + return self._copy_filtered( + i for value in values for i in self._indexes.indexes.tags.get(value, []) + ) diff --git a/sphinx_needs/warnings.py b/sphinx_needs/warnings.py index f26f12daa..796c727f0 100644 --- a/sphinx_needs/warnings.py +++ b/sphinx_needs/warnings.py @@ -9,7 +9,7 @@ from sphinx.util import logging from sphinx_needs.config import NEEDS_CONFIG, NeedsSphinxConfig -from sphinx_needs.data import NeedsView, SphinxNeedsData +from sphinx_needs.data import SphinxNeedsData from sphinx_needs.filter_common import filter_needs_view from sphinx_needs.logging import get_logger, log_warning @@ -51,9 +51,7 @@ def process_warnings(app: Sphinx, exception: Exception | None) -> None: env.needs_warnings_executed = True # type: ignore[attr-defined] # Exclude external needs for warnings check - needs_view = NeedsView( - {id: need for id, need in needs_view.items() if not need["is_external"]} - ) + needs_view = needs_view.filter_is_external(False) needs_config = NeedsSphinxConfig(app.config) warnings_always_warn = needs_config.warnings_always_warn diff --git a/tests/test_filter.py b/tests/test_filter.py index a33fb29c7..719354528 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,9 +1,13 @@ import os from pathlib import Path +from unittest.mock import Mock import pytest from sphinx.util.console import strip_colors +from sphinx_needs.filter_common import filter_needs_parts, filter_needs_view +from sphinx_needs.views import NeedsView + @pytest.mark.parametrize( "test_app", @@ -104,3 +108,172 @@ def test_filter_build_html(test_app): assert '