Skip to content

Commit

Permalink
Add base_url to Selector and implement HttpRequest.from_form (#205)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio authored Jun 14, 2024
1 parent b38ad0e commit 73c4dfa
Show file tree
Hide file tree
Showing 4 changed files with 324 additions and 6 deletions.
3 changes: 3 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[report]
exclude_lines =
if TYPE_CHECKING:
195 changes: 194 additions & 1 deletion tests/test_page_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import parsel
import pytest
import requests
from parsel import Selector

from web_poet import BrowserResponse, RequestUrl, ResponseUrl
from web_poet.page_inputs import (
Expand Down Expand Up @@ -211,6 +212,195 @@ def test_http_request_init_with_response_url() -> None:
assert str(req.url) == str(resp.url)


def test_http_request_from_form_get() -> None:
url = "https://example.com"
response = HttpResponse(
url,
b"""
<!doctype html>
<title>a</title>
<form id="search-form" accept-charset="utf-8" action="/search" method="GET">
<input type="text" value="" name="query">
<select name="bar">
<option value="code">Barcode</option>
<option selected="selected" value="tender">Bartender</option>
</select>
<input type="submit">
<input type="hidden" name="baz" value="ooka">
</form>
""",
)
form_selector = response.css("#search-form")[0]
request = HttpRequest.from_form(form_selector.root, {"query": "foo"})
assert (
str(request.url) == "https://example.com/search?bar=tender&baz=ooka&query=foo"
)
assert request.method == "GET"
assert request.headers == HttpRequestHeaders()
assert request.body == b""


def test_http_request_from_form_post() -> None:
url = "https://example.com"
response = HttpResponse(
url,
b"""
<!doctype html>
<title>a</title>
<form id="search-form" accept-charset="utf-8" action="/search" method="POST">
<input type="text" value="" name="query">
<select name="bar">
<option value="code">Barcode</option>
<option selected="selected" value="tender">Bartender</option>
</select>
<input type="submit">
<input type="hidden" name="baz" value="ooka">
</form>
""",
)
form_selector = response.css("#search-form")[0]
request = HttpRequest.from_form(form_selector.root, {"query": "foo"})
assert str(request.url) == "https://example.com/search"
assert request.method == "POST"
assert request.headers == HttpRequestHeaders(
{"Content-Type": "application/x-www-form-urlencoded"}
)
assert request.body == b"bar=tender&baz=ooka&query=foo"


def test_http_request_from_form_select_no_selected() -> None:
url = "https://example.com"
response = HttpResponse(
url,
b"""
<!doctype html>
<title>a</title>
<form id="search-form" accept-charset="utf-8" action="/search" method="POST">
<input type="text" value="" name="query">
<select name="bar">
<option value="code">Barcode</option>
<option value="tender">Bartender</option>
</select>
<input type="submit">
<input type="hidden" name="baz" value="ooka">
</form>
""",
)
form_selector = response.css("#search-form")[0]
request = HttpRequest.from_form(form_selector.root)
assert request.body == b"query=&bar=code&baz=ooka"


def test_http_request_from_form_select_no_options() -> None:
url = "https://example.com"
response = HttpResponse(
url,
b"""
<!doctype html>
<title>a</title>
<form id="search-form" accept-charset="utf-8" action="/search" method="POST">
<input type="text" value="" name="query">
<select name="bar"></select>
<input type="submit">
<input type="hidden" name="baz" value="ooka">
</form>
""",
)
form_selector = response.css("#search-form")[0]
request = HttpRequest.from_form(form_selector.root)
assert request.body == b"query=&baz=ooka"


def test_http_request_from_form_no_method() -> None:
url = "https://example.com"
response = HttpResponse(
url,
b"""
<!doctype html>
<title>a</title>
<form id="search-form" accept-charset="utf-8" action="/search">
<input type="text" value="" name="query">
<select name="bar">
<option value="code">Barcode</option>
<option value="tender">Bartender</option>
</select>
<input type="submit">
<input type="hidden" name="baz" value="ooka">
</form>
""",
)
form_selector = response.css("#search-form")[0]
request = HttpRequest.from_form(form_selector.root)
assert request.method == "GET"


def test_http_request_from_form_bad_method() -> None:
url = "https://example.com"
response = HttpResponse(
url,
b"""
<!doctype html>
<title>a</title>
<form id="search-form" accept-charset="utf-8" action="/search" method="PUT">
<input type="text" value="" name="query">
<select name="bar">
<option value="code">Barcode</option>
<option value="tender">Bartender</option>
</select>
<input type="submit">
<input type="hidden" name="baz" value="ooka">
</form>
""",
)
form_selector = response.css("#search-form")[0]
request = HttpRequest.from_form(form_selector.root)
assert request.method == "GET"


def test_http_request_from_form_no_base_url() -> None:
selector = Selector(
text="""
<!doctype html>
<title>a</title>
<form id="search-form" accept-charset="utf-8" action="/search" method="PUT">
<input type="text" value="" name="query">
<select name="bar">
<option value="code">Barcode</option>
<option value="tender">Bartender</option>
</select>
<input type="submit">
<input type="hidden" name="baz" value="ooka">
</form>
""",
)
form_selector = selector.css("#search-form")[0]
with pytest.raises(ValueError):
HttpRequest.from_form(form_selector.root)


def test_http_request_from_form_no_action() -> None:
url = "https://example.com"
response = HttpResponse(
url,
b"""
<!doctype html>
<title>a</title>
<form id="search-form" accept-charset="utf-8" method="POST">
<input type="text" value="" name="query">
<select name="bar">
<option value="code">Barcode</option>
<option value="tender">Bartender</option>
</select>
<input type="submit">
<input type="hidden" name="baz" value="ooka">
</form>
""",
)
form_selector = response.css("#search-form")[0]
request = HttpRequest.from_form(form_selector.root)
assert str(request.url) == url


@pytest.mark.parametrize(
"cls",
(
Expand Down Expand Up @@ -515,13 +705,15 @@ def test_browser_html() -> None:


def test_browser_response() -> None:
url = "http://example.com"
html = "<html><body><p>Hello, </p><p>world!</p></body></html>"
response = BrowserResponse(url="http://example.com", html=html, status=200)
response = BrowserResponse(url=url, html=html, status=200)
assert response.xpath("//p/text()").getall() == ["Hello, ", "world!"]
assert response.css("p::text").getall() == ["Hello, ", "world!"]
assert isinstance(response.selector, parsel.Selector)
assert isinstance(response.html, BrowserHtml)
assert str(response.urljoin("products")) == "http://example.com/products"
assert response.selector.root.base_url == url


@pytest.mark.parametrize(
Expand Down Expand Up @@ -668,6 +860,7 @@ def test_http_or_browser_response() -> None:
assert isinstance(response.selector, parsel.Selector)
assert str(response.urljoin("products")) == "http://example.com/products"
assert response.status is None
assert response.selector.root.base_url == url

response = AnyResponse(response=BrowserResponse(url=url, html=html, status=200))
assert response.status == 200
Expand Down
6 changes: 3 additions & 3 deletions web_poet/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ def _selector_input(self) -> str:
@property
def selector(self) -> parsel.Selector:
"""Cached instance of :external:class:`parsel.selector.Selector`."""
# XXX: caching is implemented in a manual way to avoid issues with
# caching is implemented in a manual way to avoid issues with
# non-hashable classes, where memoizemethod_noargs doesn't work
if self.__cached_selector is not None:
return self.__cached_selector
# XXX: should we pass base_url=self.url, as Scrapy does?
sel = parsel.Selector(text=self._selector_input())
base_url = str(self.url) if hasattr(self, "url") else None
sel = parsel.Selector(text=self._selector_input(), base_url=base_url)
self.__cached_selector = sel
return sel

Expand Down
Loading

0 comments on commit 73c4dfa

Please sign in to comment.