Skip to content

Commit

Permalink
WIP: refactor by removing the need for find_page_object_overrides()
Browse files Browse the repository at this point in the history
  • Loading branch information
BurnzZ committed Dec 22, 2021
1 parent 0a0ee12 commit 1efb506
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 111 deletions.
24 changes: 8 additions & 16 deletions tests/test_overrides.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
import pytest
from url_matcher import Patterns

from tests.po_lib import POTopLevel1, POTopLevel2, POTopLevelOverriden2
from tests.po_lib import POTopLevel1, POTopLevel2, POTopLevelOverriden2, secondary_registry
from tests.po_lib.a_module import POModule
from tests.po_lib.nested_package import PONestedPkg
from tests.po_lib.nested_package.a_nested_module import (
PONestedModule,
PONestedModuleOverridenSecondary,
)
from web_poet.overrides import find_page_object_overrides, PageObjectRegistry
from web_poet.overrides import PageObjectRegistry, default_registry


POS = {POTopLevel1, POTopLevel2, POModule, PONestedPkg, PONestedModule}


def test_list_page_objects_from_pkg():
"""Tests that metadata is extracted properly from the po_lib package"""
rules = find_page_object_overrides("tests.po_lib")
rules = default_registry.get_overrides_from_module("tests.po_lib")
assert {po.use for po in rules} == POS

for rule in rules:
Expand All @@ -26,7 +26,7 @@ def test_list_page_objects_from_pkg():


def test_list_page_objects_from_module():
rules = find_page_object_overrides("tests.po_lib.a_module")
rules = default_registry.get_overrides_from_module("tests.po_lib.a_module")
assert len(rules) == 1
rule = rules[0]
assert rule.use == POModule
Expand All @@ -35,22 +35,22 @@ def test_list_page_objects_from_module():


def test_list_page_objects_from_empty_module():
rules = find_page_object_overrides("tests.po_lib.an_empty_module")
rules = default_registry.get_overrides_from_module("tests.po_lib.an_empty_module")
assert len(rules) == 0


def test_list_page_objects_from_empty_pkg():
rules = find_page_object_overrides("tests.po_lib.an_empty_package")
rules = default_registry.get_overrides_from_module("tests.po_lib.an_empty_package")
assert len(rules) == 0


def test_list_page_objects_from_unknown_module():
with pytest.raises(ImportError):
find_page_object_overrides("tests.po_lib.unknown_module")
default_registry.get_overrides_from_module("tests.po_lib.unknown_module")


def test_list_page_objects_from_imported_registry():
rules = find_page_object_overrides("tests.po_lib", registry_name="secondary")
rules = secondary_registry.get_overrides_from_module("tests.po_lib")
assert len(rules) == 2
rule_for = {po.use: po for po in rules}

Expand All @@ -63,16 +63,8 @@ def test_list_page_objects_from_imported_registry():
assert pones.instead_of == PONestedModuleOverridenSecondary


def test_list_page_objects_from_non_existing_registry():
assert find_page_object_overrides("tests.po_lib", registry_name="not-exist") == []


def test_cmd():
from web_poet.__main__ import main

assert main(["tests.po_lib"]) is None


def test_registry_repr():
registry = PageObjectRegistry(name="test")
assert "name='test'" in str(registry)
2 changes: 1 addition & 1 deletion web_poet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .pages import WebPage, ItemPage, ItemWebPage, Injectable
from .page_inputs import ResponseData
from .overrides import handle_urls, find_page_object_overrides, PageObjectRegistry
from .overrides import handle_urls, PageObjectRegistry, default_registry
14 changes: 4 additions & 10 deletions web_poet/__main__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""Returns all Override Rules from the default registry."""

import argparse
from typing import Callable

import tabulate

from web_poet.overrides import find_page_object_overrides
from web_poet import default_registry


def qualified_name(cls: Callable) -> str:
Expand All @@ -20,14 +22,6 @@ def main(args=None):
type=str,
help="A package or module to list overrides from",
)
parser.add_argument(
"--registry",
"-n",
metavar="REGISTRY_NAME",
type=str,
help="Registry name to list overrides from",
default="default",
)
args = parser.parse_args(args)
table = [
(
Expand All @@ -48,7 +42,7 @@ def main(args=None):
rule.for_patterns.priority,
rule.meta,
)
for rule in find_page_object_overrides(args.module, registry_name=args.registry)
for rule in default_registry.get_overrides_from_module(args.module)
]
print(tabulate.tabulate(table, headers="firstrow"))

Expand Down
111 changes: 27 additions & 84 deletions web_poet/overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,16 @@
import importlib.util
import warnings
import pkgutil
import sys
from dataclasses import dataclass, field
from typing import Iterable, Union, List, Callable, Dict, Any

from url_matcher import Patterns

# Used by ``PageObjectRegistry`` to declare itself in a module so that it's
# easily discovered by ``find_page_object_overrides()`` later on.
REGISTRY_MODULE_ANCHOR = "_registry_module_anchor_"


@dataclass(frozen=True)
class HandleUrlsSpec:
"""Meta information used by the :py:func:`web_poet.handle_urls` decorator"""

patterns: Patterns
overrides: Callable
meta: Dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
class OverrideRule:
"""A single override rule. Specify when a page object should be used instead of another"""
"""A single override rule that specifies when a page object should be used
instead of another."""

for_patterns: Patterns
use: Callable
Expand Down Expand Up @@ -109,23 +96,7 @@ class ExampleComProductPage(ItemPage):
"""

def __init__(self, name: str = ""):
self.name = name
self.data: Dict[Callable, HandleUrlsSpec] = {}

def _declare_registry_in_module(self, cls):
"""This allows the Registry to be easily discovered later on by
``find_page_object_overrides()`` by explicitly declaring its presence
on the given module.
"""

module = sys.modules[cls.__module__]
if not hasattr(module, REGISTRY_MODULE_ANCHOR):
registries = {self.name: self}
else:
registries = getattr(module, REGISTRY_MODULE_ANCHOR)
registries[self.name] = self

setattr(module, REGISTRY_MODULE_ANCHOR, registries)
self.data: Dict[Callable, OverrideRule] = {}

def handle_urls(
self,
Expand Down Expand Up @@ -157,20 +128,19 @@ def handle_urls(
"""

def wrapper(cls):
self._declare_registry_in_module(cls)

spec = HandleUrlsSpec(
patterns=Patterns(
rule = OverrideRule(
for_patterns=Patterns(
include=_as_list(include),
exclude=_as_list(exclude),
priority=priority,
),
overrides=overrides,
use=cls,
instead_of=overrides,
meta=kwargs,
)
# If it was already defined, we don't want to override it
if cls not in self.data:
self.data[cls] = spec
self.data[cls] = rule
else:
warnings.warn(
f"Multiple @handle_urls annotations with the same 'overrides' "
Expand All @@ -182,22 +152,34 @@ def wrapper(cls):

return wrapper

def get_data_from_module(self, module: str) -> Dict[Callable, HandleUrlsSpec]:
"""Returns the override mappings that were declared using ``handle_urls``
# TODO: implement by calling get_overrides_from_module() in the Scrapy proj
def get_overrides(self) -> List[OverrideRule]:
"""Returns all override rules that were declared using ``handle_urls``
inside the current Scrapy project."""
pass

def get_overrides_from_module(self, module: str) -> List[OverrideRule]:
"""Returns the override rules that were declared using ``handle_urls``
in a specific module.
This is useful if you've organized your Page Objects into multiple
submodules in your project.
"""
rules: Dict[Callable, OverrideRule] = {}

for mod in walk_modules(module):
# Dict ensures that no duplicates are collected and returned.
rules.update(self._filter_from_module(mod.__name__))

return list(rules.values())

def _filter_from_module(self, module: str) -> Dict[Callable, OverrideRule]:
return {
cls: spec
for cls, spec in self.data.items()
cls: rule
for cls, rule in self.data.items()
if cls.__module__.startswith(module)
}

def __repr__(self) -> str:
return f"PageObjectRegistry(name='{self.name}')"


# For ease of use, we'll create a default registry so that users can simply
# use its `handles_url()` method directly by `from web_poet import handles_url`
Expand Down Expand Up @@ -225,42 +207,3 @@ def onerror(err):
):
mod = importlib.import_module(info.name)
yield mod


def find_page_object_overrides(
module: str, registry_name: str = "default"
) -> List[OverrideRule]:
"""
Find all the Page Objects overrides in the given module/package and its
submodules.
The Page Objects that have been decorated with the ``handle_urls`` decorator
from the specified Registry ``name`` will be returned.
Note that this will explore the `module` and traverse its `submodules`.
:param module: The module or package to search in
:param registry_name: Only return page objects overrides in this registry
:return: Return a list of :py:class:`web_poet.overrides.OverrideRule` metadata.
"""

page_objects: Dict[Callable, HandleUrlsSpec] = {}
for mod in walk_modules(module):
handle_urls_dict = getattr(mod, REGISTRY_MODULE_ANCHOR, {})

# A module could have multiple non-default PageObjectRegistry instances
registry = handle_urls_dict.get(registry_name)
if not registry:
continue

page_objects.update(registry.get_data_from_module(mod.__name__))

return [
OverrideRule(
for_patterns=spec.patterns,
use=po,
instead_of=spec.overrides,
meta=spec.meta,
)
for po, spec in page_objects.items()
]

0 comments on commit 1efb506

Please sign in to comment.