Skip to content

Commit

Permalink
Added workaround for Page.navigate bug, added query param to base API
Browse files Browse the repository at this point in the history
  • Loading branch information
chazkii committed Jul 23, 2017
1 parent 6000bda commit 7a10b2c
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 62 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,5 @@ staticfiles/
.cache/

/docs/_build/
dist
chromewhip.egg-info
92 changes: 66 additions & 26 deletions chromewhip/chrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from chromewhip.base import SyncAdder
from chromewhip.protocol import page, runtime, target, input, inspector, browser, accessibility

TIMEOUT_S = 30
TIMEOUT_S = 25
MAX_PAYLOAD_SIZE_BYTES = 2 ** 23
MAX_PAYLOAD_SIZE_MB = MAX_PAYLOAD_SIZE_BYTES / 1024 ** 2

Expand Down Expand Up @@ -45,6 +45,7 @@ def __init__(self, title, url, ws_uri):
self._current_task: Optional[asyncio.Task] = None
self._ack_events = {}
self._ack_payloads = {}
self._input_events = {}
self._trigger_events = {}
self._event_payloads = {}
self._recv_task = None
Expand Down Expand Up @@ -91,15 +92,23 @@ async def recv_handler(self):
elif 'method' in result:
self._recv_log.debug('Received event message!')
event = helpers.json_to_event(result)
hash_ = event.hash()
self._recv_log.debug('Received event with hash "%s", storing...' % hash)
# first, check if any requests are waiting upon it
self._recv_log.debug('Received a "%s" event , storing against hash and name...' % event.js_name)
hash_ = event.hash_()
self._event_payloads[hash_] = event
self._event_payloads[event.js_name] = event

# first, check if any requests are waiting upon it
input_event = self._input_events.get(event.js_name)
if input_event:
self._recv_log.debug('input exists for event name "%s", alerting...' % event.js_name)
input_event.set()

trigger_event = self._trigger_events.get(hash_)
if trigger_event:
self._recv_log.debug('trigger exists for hash "%s", alerting...' % hash_)
trigger_event.set()
else:
# TODO: deal with invalid state
self._recv_log.info('Invalid message %s, what do i do now?' % result)

except asyncio.CancelledError:
Expand All @@ -114,19 +123,34 @@ async def validator(result: dict, types: dict):
raise KeyError('%s not in expected payload of %s' % (k, types))
if not isinstance(v, type_):
raise ValueError('%s is not expected type %s, instead is %s' % (v, type_, type(v)))
# await result
return result

async def _send(self, request, recv_validator=None, event_cls=None):
async def _send(self, request, recv_validator=None, input_event_cls=None, trigger_event_cls=None):
"""
TODO:
* clean up of stale events in payloads and asyncio event stores
:param request:
:param recv_validator:
:param input_event_cls:
:param trigger_event_cls:
:return:
"""
self._message_id += 1
request['id'] = self._message_id

ack_event = asyncio.Event()
self._ack_events[self._message_id] = ack_event

if event_cls:
if not event_cls.is_hashable:
raise ValueError('Cannot trigger of event type "%s" as not hashable' % event_cls.__name__)
if input_event_cls:
if not input_event_cls.is_hashable:
raise ValueError('Input event class "%s" as not hashable' % input_event_cls.__name__)
# we can already register the input event before sending command
input_event = asyncio.Event()
self._input_events[input_event_cls.js_name] = input_event

if trigger_event_cls:
if not trigger_event_cls.is_hashable:
raise ValueError('Trigger event type "%s" as not hashable' % trigger_event_cls.__name__)

result = {'ack': None, 'event': None}

Expand All @@ -140,8 +164,8 @@ async def _send(self, request, recv_validator=None, event_cls=None):
await asyncio.wait_for(ack_event.wait(), timeout=TIMEOUT_S) # recv
self._send_log.debug('Received ack event set for id=%s' % request['id'])

# ack_payload = self._ack_payloads[request['id']]
ack_payload = self._ack_payloads.get(request['id'])

if not ack_payload:
self._send_log.error('Notified but no payload available for id=%s!' % request['id'])
return result
Expand All @@ -159,26 +183,41 @@ async def _send(self, request, recv_validator=None, event_cls=None):
ack_result = recv_validator(ack_payload['result'])
self._send_log.debug('Successful recv validation for id=%s...' % request['id'])
ack_payload['result'] = ack_result
else:
ack_result = ack_payload['result']

result['ack'] = ack_payload

if event_cls:
# check if we've already received it
# TODO: how to i match ack payload to event cls init params
# - make a huge assumption that the ack payload are the hashable parts of event cls
hash_ = event_cls.build_hash(**ack_result)
if input_event_cls:
hash_ = input_event_cls.js_name
# use latest payload as key is not unique within a single session
event = self._event_payloads.get(hash_)
if event:
self._send_log.debug('Fetching stored event with hash "%s"...' % hash_)
result['event'] = event
else:
hash_input_dict = {}
if not event:
self._send_log.debug('Waiting for event with hash "%s"...' % hash_)
await asyncio.wait_for(input_event.wait(), timeout=TIMEOUT_S) # recv
event = self._event_payloads.get(hash_)

params = event.hash_().split(':')[-1].split(',')
for p in params:
kv = p.split('=')
hash_input_dict[kv[0]] = kv[1]
else:
hash_input_dict = ack_result

if trigger_event_cls:
try:
hash_ = trigger_event_cls.build_hash(**hash_input_dict)
except TypeError:
raise TypeError('Event "%s" hash cannot be built with "%s"' % trigger_event_cls.js_name, hash_input_dict)
event = self._event_payloads.get(hash_)
if not event:
self._send_log.debug('Waiting for event with hash "%s"...' % hash_)
trigger_event = asyncio.Event()
self._trigger_events[hash_] = trigger_event
await asyncio.wait_for(trigger_event.wait(), timeout=TIMEOUT_S) # recv
event = self._event_payloads.get(hash_)
if event:
result['event'] = event
result['event'] = event

self._send_log.info('Successfully sent command = %s' % msg)
return result
Expand Down Expand Up @@ -218,8 +257,8 @@ def ws_uri(self):
async def enable_page_events(self):
return await self._send(*page.Page.enable())

async def send_command(self, command, await_on_event_type=None):
return await self._send(*command, event_cls=await_on_event_type)
async def send_command(self, command, input_event_type=None, await_on_event_type=None):
return await self._send(*command, input_event_cls=input_event_type, trigger_event_cls=await_on_event_type)

async def html(self):
result = await self.evaluate('document.documentElement.outerHTML')
Expand All @@ -235,9 +274,10 @@ async def go(self, url):
"""
Navigate the tab to the URL
"""
# event = page.FrameNavigatedEvent
event = page.FrameStoppedLoadingEvent
return await self.send_command(page.Page.navigate(url), event)
# workaround for bug
return await self.send_command(page.Page.navigate(url),
input_event_type=page.FrameNavigatedEvent,
await_on_event_type=page.FrameStoppedLoadingEvent)

async def evaluate(self, javascript):
"""
Expand Down
7 changes: 2 additions & 5 deletions chromewhip/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,7 @@ class BaseEvent:
hashable = []
is_hashable = False

def hash(self):
"""
TODO: deal with event with no root key hashables. must match build_hash
"""
# hashable_params = {k:v for k, v in cls.__dict__.items() if k in cls.hashable}
def hash_(self):
hashable_params = {}
for k, v in self.__dict__.items():
if k in self.hashable:
Expand All @@ -72,6 +68,7 @@ def hash(self):
return h


# TODO: how do
def json_to_event(payload) -> BaseEvent:
try:
prot_name, js_event = payload['method'].split('.')
Expand Down
6 changes: 4 additions & 2 deletions chromewhip/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ async def _go(request: web.Request):

url = request.query.get('url')
if not url:
return web.HTTPBadRequest(reason='no url query param provided') # TODO: match splash
return web.HTTPBadRequest(reason='no url query param provided') # TODO: match splash reply

wait_s = float(request.query.get('wait', 0))

raw_viewport = request.query.get('viewport', '1024x768')
parts = raw_viewport.split('x')
Expand All @@ -44,7 +46,7 @@ async def _go(request: web.Request):
await tab.send_command(cmd)
await tab.enable_page_events()
await tab.go(url)

await asyncio.sleep(wait_s)
if js_profile_name:
await tab.evaluate(js_profiles[js_profile_name])

Expand Down
25 changes: 13 additions & 12 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,19 @@ Arguments:
url : string : required
The url to render (required)

.. _arg-wait:

wait : float : optional
Time (in seconds) to wait for updates after page is loaded i.e. the
relevant ``Page.frameStoppedLoading`` event is received.
(defaults to 0). Increase this value if you expect pages to contain
setInterval/setTimeout javascript calls, because with wait=0
callbacks of setInterval/setTimeout won't be executed. Non-zero
:ref:`wait <arg-wait>` is also required for PNG and JPEG rendering when
doing full-page rendering (see :ref:`render_all <arg-render-all>`).

Wait time must be less than :ref:`timeout <arg-timeout>`.

.. TODO: implement
.. _arg-baseurl:
Expand Down Expand Up @@ -52,18 +65,6 @@ url : string : required
``request:set_timeout(timeout)`` method; :ref:`splash-resource-timeout`
attribute.
.. _arg-wait:
wait : float : optional
Time (in seconds) to wait for updates after page is loaded
(defaults to 0). Increase this value if you expect pages to contain
setInterval/setTimeout javascript calls, because with wait=0
callbacks of setInterval/setTimeout won't be executed. Non-zero
:ref:`wait <arg-wait>` is also required for PNG and JPEG rendering when
doing full-page rendering (see :ref:`render_all <arg-render-all>`).
Wait time must be less than :ref:`timeout <arg-timeout>`.
.. _arg-proxy:
proxy : string : optional
Expand Down
72 changes: 59 additions & 13 deletions tests/test_chrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from websockets.exceptions import ConnectionClosed


from chromewhip import chrome, protocol, helpers
from chromewhip import chrome, helpers
from chromewhip.protocol import page

TEST_HOST = 'localhost'
TEST_PORT = 32322
Expand Down Expand Up @@ -108,16 +109,16 @@ async def test_server(websocket, path):


@pytest.mark.asyncio
async def test_can_successfully_trigger_on_event_prior_to_commmand_containing_event_id(event_loop, chrome_tab):
async def test_send_command_can_trigger_on_event_prior_to_commmand_containing_event_id(event_loop, chrome_tab):

msg_id = 4
frame_id = '3228.1'
url = 'http://example.com'

chrome_tab._message_id = msg_id - 1
f = protocol.page.Frame(frame_id, 'test', url, 'test', 'text/html')
p = protocol.page.Page.navigate(url)
fe = protocol.page.FrameNavigatedEvent(f)
f = page.Frame(frame_id, 'test', url, 'test', 'text/html')
p = page.Page.navigate(url)
fe = page.FrameNavigatedEvent(f)

ack = {'id': msg_id, 'result': {'frameId': frame_id}}
triggers = {
Expand All @@ -137,27 +138,27 @@ async def test_can_successfully_trigger_on_event_prior_to_commmand_containing_ev
await chrome_tab.connect()

log.info('Sending command and awaiting...')
result = await chrome_tab.send_command(p, protocol.page.FrameNavigatedEvent)
result = await chrome_tab.send_command(p, await_on_event_type=page.FrameNavigatedEvent)
assert result.get('ack') is not None
assert result.get('event') is not None
event = result.get('event')
assert isinstance(event, protocol.page.FrameNavigatedEvent)
assert isinstance(event, page.FrameNavigatedEvent)
assert event.frame.id == f.id
assert event.frame.url == f.url

server.close()
await server.wait_closed()

@pytest.mark.asyncio
async def test_can_successfully_trigger_on_event_after_commmand_containing_event_id(event_loop, chrome_tab):
async def test_send_command_can_trigger_on_event_after_commmand_containing_event_id(event_loop, chrome_tab):
msg_id = 4
frame_id = '3228.1'
url = 'http://example.com'

chrome_tab._message_id = msg_id - 1
f = protocol.page.Frame(frame_id, 'test', url, 'test', 'text/html')
p = protocol.page.Page.navigate(url)
fe = protocol.page.FrameNavigatedEvent(f)
f = page.Frame(frame_id, 'test', url, 'test', 'text/html')
p = page.Page.navigate(url)
fe = page.FrameNavigatedEvent(f)

ack = {'id': msg_id, 'result': {'frameId': frame_id}}
triggers = {
Expand All @@ -175,14 +176,59 @@ async def test_can_successfully_trigger_on_event_after_commmand_containing_event
await chrome_tab.connect()

log.info('Sending command and awaiting...')
result = await chrome_tab.send_command(p, protocol.page.FrameNavigatedEvent)
result = await chrome_tab.send_command(p, await_on_event_type=page.FrameNavigatedEvent)
assert result.get('ack') is not None
assert result.get('event') is not None
event = result.get('event')
assert isinstance(event, protocol.page.FrameNavigatedEvent)
assert isinstance(event, page.FrameNavigatedEvent)
assert event.frame.id == f.id
assert event.frame.url == f.url

server.close()
await server.wait_closed()

@pytest.mark.asyncio
async def test_send_command_can_trigger_on_event_with_input_event(event_loop, chrome_tab):
"""test_send_command_can_trigger_on_event_with_input_event
Below is test case that will workaround this issue
https://github.com/chuckus/chromewhip/issues/2
"""
msg_id = 4
old_frame_id = '2000.1'
frame_id = '3228.1'
url = 'http://example.com'

chrome_tab._message_id = msg_id - 1
f = page.Frame(frame_id, 'test', url, 'test', 'text/html')
p = page.Page.navigate(url)
fe = page.FrameNavigatedEvent(f)
fsle = page.FrameStoppedLoadingEvent(frame_id)

# command ack is not related to proceeding events
ack = {'id': msg_id, 'result': {'frameId': old_frame_id}}
triggers = {
msg_id: [ack, delay_s(1), fe, fsle]
}

end_msg = copy.copy(p[0])
end_msg['id'] = msg_id
q = queue.Queue()
q.put(end_msg)

test_server = init_test_server(triggers, expected=q)
start_server = websockets.serve(test_server, TEST_HOST, TEST_PORT)
server = await start_server
await chrome_tab.connect()

log.info('Sending command and awaiting...')
result = await chrome_tab.send_command(p,
input_event_type=page.FrameNavigatedEvent,
await_on_event_type=page.FrameStoppedLoadingEvent)
assert result.get('ack') is not None
assert result.get('event') is not None
event = result.get('event')
assert isinstance(event, page.FrameStoppedLoadingEvent)
assert event.frameId == f.id

server.close()
await server.wait_closed()
Loading

0 comments on commit 7a10b2c

Please sign in to comment.