diff --git a/tests/test_overrides.py b/tests/test_overrides.py index 07d13375..a157e29d 100644 --- a/tests/test_overrides.py +++ b/tests/test_overrides.py @@ -1,14 +1,14 @@ import pytest from url_matcher import Patterns -from tests.po_lib import POTopLevel1, POTopLevel2, POTopLevelOverriden2 +from tests.po_lib import POTopLevel1, POTopLevel2, POTopLevelOverriden2, secondary_registry from tests.po_lib.a_module import POModule from tests.po_lib.nested_package import PONestedPkg from tests.po_lib.nested_package.a_nested_module import ( PONestedModule, PONestedModuleOverridenSecondary, ) -from web_poet.overrides import find_page_object_overrides, PageObjectRegistry +from web_poet.overrides import PageObjectRegistry, default_registry POS = {POTopLevel1, POTopLevel2, POModule, PONestedPkg, PONestedModule} @@ -16,7 +16,7 @@ def test_list_page_objects_from_pkg(): """Tests that metadata is extracted properly from the po_lib package""" - rules = find_page_object_overrides("tests.po_lib") + rules = default_registry.get_overrides_from_module("tests.po_lib") assert {po.use for po in rules} == POS for rule in rules: @@ -26,7 +26,7 @@ def test_list_page_objects_from_pkg(): def test_list_page_objects_from_module(): - rules = find_page_object_overrides("tests.po_lib.a_module") + rules = default_registry.get_overrides_from_module("tests.po_lib.a_module") assert len(rules) == 1 rule = rules[0] assert rule.use == POModule @@ -35,22 +35,22 @@ def test_list_page_objects_from_module(): def test_list_page_objects_from_empty_module(): - rules = find_page_object_overrides("tests.po_lib.an_empty_module") + rules = default_registry.get_overrides_from_module("tests.po_lib.an_empty_module") assert len(rules) == 0 def test_list_page_objects_from_empty_pkg(): - rules = find_page_object_overrides("tests.po_lib.an_empty_package") + rules = default_registry.get_overrides_from_module("tests.po_lib.an_empty_package") assert len(rules) == 0 def test_list_page_objects_from_unknown_module(): with pytest.raises(ImportError): - find_page_object_overrides("tests.po_lib.unknown_module") + default_registry.get_overrides_from_module("tests.po_lib.unknown_module") def test_list_page_objects_from_imported_registry(): - rules = find_page_object_overrides("tests.po_lib", registry_name="secondary") + rules = secondary_registry.get_overrides_from_module("tests.po_lib") assert len(rules) == 2 rule_for = {po.use: po for po in rules} @@ -63,16 +63,8 @@ def test_list_page_objects_from_imported_registry(): assert pones.instead_of == PONestedModuleOverridenSecondary -def test_list_page_objects_from_non_existing_registry(): - assert find_page_object_overrides("tests.po_lib", registry_name="not-exist") == [] - def test_cmd(): from web_poet.__main__ import main assert main(["tests.po_lib"]) is None - - -def test_registry_repr(): - registry = PageObjectRegistry(name="test") - assert "name='test'" in str(registry) diff --git a/web_poet/__init__.py b/web_poet/__init__.py index ee3462bd..f4366d05 100644 --- a/web_poet/__init__.py +++ b/web_poet/__init__.py @@ -1,3 +1,3 @@ from .pages import WebPage, ItemPage, ItemWebPage, Injectable from .page_inputs import ResponseData -from .overrides import handle_urls, find_page_object_overrides, PageObjectRegistry +from .overrides import handle_urls, PageObjectRegistry, default_registry diff --git a/web_poet/__main__.py b/web_poet/__main__.py index fc5fe3cc..16b231b8 100644 --- a/web_poet/__main__.py +++ b/web_poet/__main__.py @@ -1,9 +1,11 @@ +"""Returns all Override Rules from the default registry.""" + import argparse from typing import Callable import tabulate -from web_poet.overrides import find_page_object_overrides +from web_poet import default_registry def qualified_name(cls: Callable) -> str: @@ -20,14 +22,6 @@ def main(args=None): type=str, help="A package or module to list overrides from", ) - parser.add_argument( - "--registry", - "-n", - metavar="REGISTRY_NAME", - type=str, - help="Registry name to list overrides from", - default="default", - ) args = parser.parse_args(args) table = [ ( @@ -48,7 +42,7 @@ def main(args=None): rule.for_patterns.priority, rule.meta, ) - for rule in find_page_object_overrides(args.module, registry_name=args.registry) + for rule in default_registry.get_overrides_from_module(args.module) ] print(tabulate.tabulate(table, headers="firstrow")) diff --git a/web_poet/overrides.py b/web_poet/overrides.py index eb30f7a3..53302a9e 100644 --- a/web_poet/overrides.py +++ b/web_poet/overrides.py @@ -2,29 +2,16 @@ import importlib.util import warnings import pkgutil -import sys from dataclasses import dataclass, field from typing import Iterable, Union, List, Callable, Dict, Any from url_matcher import Patterns -# Used by ``PageObjectRegistry`` to declare itself in a module so that it's -# easily discovered by ``find_page_object_overrides()`` later on. -REGISTRY_MODULE_ANCHOR = "_registry_module_anchor_" - - -@dataclass(frozen=True) -class HandleUrlsSpec: - """Meta information used by the :py:func:`web_poet.handle_urls` decorator""" - - patterns: Patterns - overrides: Callable - meta: Dict[str, Any] = field(default_factory=dict) - @dataclass(frozen=True) class OverrideRule: - """A single override rule. Specify when a page object should be used instead of another""" + """A single override rule that specifies when a page object should be used + instead of another.""" for_patterns: Patterns use: Callable @@ -109,23 +96,7 @@ class ExampleComProductPage(ItemPage): """ def __init__(self, name: str = ""): - self.name = name - self.data: Dict[Callable, HandleUrlsSpec] = {} - - def _declare_registry_in_module(self, cls): - """This allows the Registry to be easily discovered later on by - ``find_page_object_overrides()`` by explicitly declaring its presence - on the given module. - """ - - module = sys.modules[cls.__module__] - if not hasattr(module, REGISTRY_MODULE_ANCHOR): - registries = {self.name: self} - else: - registries = getattr(module, REGISTRY_MODULE_ANCHOR) - registries[self.name] = self - - setattr(module, REGISTRY_MODULE_ANCHOR, registries) + self.data: Dict[Callable, OverrideRule] = {} def handle_urls( self, @@ -157,20 +128,19 @@ def handle_urls( """ def wrapper(cls): - self._declare_registry_in_module(cls) - - spec = HandleUrlsSpec( - patterns=Patterns( + rule = OverrideRule( + for_patterns=Patterns( include=_as_list(include), exclude=_as_list(exclude), priority=priority, ), - overrides=overrides, + use=cls, + instead_of=overrides, meta=kwargs, ) # If it was already defined, we don't want to override it if cls not in self.data: - self.data[cls] = spec + self.data[cls] = rule else: warnings.warn( f"Multiple @handle_urls annotations with the same 'overrides' " @@ -182,22 +152,34 @@ def wrapper(cls): return wrapper - def get_data_from_module(self, module: str) -> Dict[Callable, HandleUrlsSpec]: - """Returns the override mappings that were declared using ``handle_urls`` + # TODO: implement by calling get_overrides_from_module() in the Scrapy proj + def get_overrides(self) -> List[OverrideRule]: + """Returns all override rules that were declared using ``handle_urls`` + inside the current Scrapy project.""" + pass + + def get_overrides_from_module(self, module: str) -> List[OverrideRule]: + """Returns the override rules that were declared using ``handle_urls`` in a specific module. This is useful if you've organized your Page Objects into multiple submodules in your project. """ + rules: Dict[Callable, OverrideRule] = {} + + for mod in walk_modules(module): + # Dict ensures that no duplicates are collected and returned. + rules.update(self._filter_from_module(mod.__name__)) + + return list(rules.values()) + + def _filter_from_module(self, module: str) -> Dict[Callable, OverrideRule]: return { - cls: spec - for cls, spec in self.data.items() + cls: rule + for cls, rule in self.data.items() if cls.__module__.startswith(module) } - def __repr__(self) -> str: - return f"PageObjectRegistry(name='{self.name}')" - # For ease of use, we'll create a default registry so that users can simply # use its `handles_url()` method directly by `from web_poet import handles_url` @@ -225,42 +207,3 @@ def onerror(err): ): mod = importlib.import_module(info.name) yield mod - - -def find_page_object_overrides( - module: str, registry_name: str = "default" -) -> List[OverrideRule]: - """ - Find all the Page Objects overrides in the given module/package and its - submodules. - - The Page Objects that have been decorated with the ``handle_urls`` decorator - from the specified Registry ``name`` will be returned. - - Note that this will explore the `module` and traverse its `submodules`. - - :param module: The module or package to search in - :param registry_name: Only return page objects overrides in this registry - :return: Return a list of :py:class:`web_poet.overrides.OverrideRule` metadata. - """ - - page_objects: Dict[Callable, HandleUrlsSpec] = {} - for mod in walk_modules(module): - handle_urls_dict = getattr(mod, REGISTRY_MODULE_ANCHOR, {}) - - # A module could have multiple non-default PageObjectRegistry instances - registry = handle_urls_dict.get(registry_name) - if not registry: - continue - - page_objects.update(registry.get_data_from_module(mod.__name__)) - - return [ - OverrideRule( - for_patterns=spec.patterns, - use=po, - instead_of=spec.overrides, - meta=spec.meta, - ) - for po, spec in page_objects.items() - ]