Skip to content

Commit

Permalink
refactored wsdl to force zeep using w3af http_client, extended_urllib…
Browse files Browse the repository at this point in the history
… accepts url as str in GET and POST, and headers as dict
  • Loading branch information
Q-back committed Sep 30, 2020
1 parent fafed41 commit a81845f
Show file tree
Hide file tree
Showing 16 changed files with 464 additions and 119 deletions.
30 changes: 30 additions & 0 deletions w3af/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pytest

from w3af.core.data.dc.headers import Headers
from w3af.core.data.parsers.doc.url import URL
from w3af.core.data.url.HTTPRequest import HTTPRequest
from w3af.core.data.url.HTTPResponse import HTTPResponse


@pytest.fixture
def http_response():
url = URL('http://example.com/')
headers = Headers([('content-type', 'text/html')])
return HTTPResponse(
200,
'<body></body>',
headers,
url,
url,
)


@pytest.fixture
def http_request():
url = URL('http://example.com/')
headers = Headers([('content-type', 'text/html')])
return HTTPRequest(
url,
headers,
method='GET',
)
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from w3af.core.controllers.chrome.tests.helpers import ExtendedHttpRequestHandler


@pytest.mark.skip('uses internet')
class AngularBasicTest(BaseChromeCrawlerTest):
def test_angular_click(self):
self._unittest_setup(AngularButtonClickRequestHandler)
Expand Down
111 changes: 111 additions & 0 deletions w3af/core/data/parsers/doc/tests/test_wsdl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import pytest
from mock import MagicMock, patch

from w3af.core.data.dc.headers import Headers
from w3af.core.data.parsers.doc.url import URL
from w3af.core.data.parsers.doc.wsdl import ZeepTransport, WSDLParser
from w3af.core.data.url.HTTPResponse import HTTPResponse
from w3af.core.data.url.extended_urllib import ExtendedUrllib
from w3af.plugins.tests.plugin_testing_tools import NetworkPatcher


@pytest.fixture
def mocked_http_client():
return MagicMock()


@pytest.fixture
def zeep_transport(mocked_http_client):
transport = ZeepTransport()
transport.uri_opener = mocked_http_client
return transport


@pytest.fixture
def zeep_transport_from_class(zeep_transport):
return lambda *args, **kwargs: zeep_transport


@pytest.fixture
def http_response():
return HTTPResponse(
200,
'<div></div>',
Headers(),
URL('https://example.com/'),
URL('https://example.com/'),
)


class TestZeepTransport:
def test_it_implements_all_needed_methods(self):
zeep_transport = ZeepTransport()
required_methods = [
'get',
'load',
'post',
'post_xml',
]
for method in required_methods:
assert hasattr(zeep_transport, method)

def test_it_calls_http_client_on_get_method(self, zeep_transport, mocked_http_client):
zeep_transport.get('https://example.com/', '', {})
assert mocked_http_client.GET.called

def test_it_calls_http_client_on_post_method(self, zeep_transport, mocked_http_client):
zeep_transport.post('https://example.com/', 'some data', {})
assert mocked_http_client.POST.called

def test_it_calls_http_client_on_post_xml_method(self, zeep_transport, mocked_http_client):
from lxml import etree # feeding Zeep dependencies
zeep_transport.post_xml('https://example.com/', etree.Element('test'), {})
assert mocked_http_client.POST.called

def test_it_loads_the_response_content(self, zeep_transport, mocked_http_client):
mocked_response = MagicMock(name='mocked_response')
mocked_response.body = 'test'
mocked_http_client.GET = MagicMock(return_value=mocked_response)

result = zeep_transport.load('http://example.com/')
assert result == 'test'


class TestZeepTransportIntegration:
def test_it_can_perform_get_request(self):
url = 'http://example.com/'
with NetworkPatcher() as network_patcher:
zeep_transport = ZeepTransport()
zeep_transport.get(url, {}, {})
assert url in network_patcher.mocked_server.urls_requested

def test_it_can_perform_post_request(self):
url = 'http://example.com/'
with NetworkPatcher() as network_patcher:
zeep_transport = ZeepTransport()
zeep_transport.post(url, 'some data', {})
assert url in network_patcher.mocked_server.urls_requested

def test_it_can_load_url(self):
url = 'http://example.com/'
with NetworkPatcher() as network_patcher:
zeep_transport = ZeepTransport()
zeep_transport.load('http://example.com/')
assert url in network_patcher.mocked_server.urls_requested


class TestWSDLParserIntegration:
def test_wsdl_zeep_transport_uses_extended_urllib(self):
zeep_transport = ZeepTransport()
assert isinstance(zeep_transport.uri_opener, ExtendedUrllib)

def test_it_uses_extended_urllib_for_performing_requests(
self,
mocked_http_client,
zeep_transport_from_class,
http_response,
):
mocked_http_client.GET = MagicMock(return_value=http_response)
with patch('w3af.core.data.parsers.doc.wsdl.ZeepTransport', zeep_transport_from_class):
WSDLParser(http_response=http_response)
assert mocked_http_client.GET.called
54 changes: 47 additions & 7 deletions w3af/core/data/parsers/doc/wsdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,62 @@
"""
import contextlib
import sys
import xml.parsers.expat as expat
from cStringIO import StringIO

import SOAPpy
import zeep
from requests import HTTPError
from zeep.exceptions import XMLSyntaxError

import w3af.core.controllers.output_manager as om
import w3af.core.data.kb.knowledge_base as kb
from w3af.core.controllers.exceptions import BaseFrameworkException
from w3af.core.data.kb.info import Info
from w3af.core.data.parsers.doc.baseparser import BaseParser
from w3af.core.data.parsers.doc.url import URL
from w3af.core.controllers import output_manager


class ZeepTransport(zeep.Transport):
"""
Custom Zeep Transport class which overrides it's methods to use w3af's HTTP client.
We don't call super() on any overwritten method as we want to force Zeep to use
our client, not their.
Tradeoff:
As WSDLParser has to be tight coupled to Zeep by design we have to also
make tight coupling between WSDLParser and ExtendedUrllib. And that's because
parser by design is not intended to perform any requests by itself. Although
Zeep is constructed in this specific way that it performs request when it's
instantiated.
As parsers are not intended to make requests there's also no obvious way to
pass uri_opener into parser.
"""
def __init__(self):
super(ZeepTransport, self).__init__()
from w3af.core.data.url.extended_urllib import ExtendedUrllib
self.uri_opener = ExtendedUrllib()
self.uri_opener.setup(disable_cache=True)

def get(self, address, params, headers):
return self.uri_opener.GET(address, params, headers=headers)

def post(self, address, message, headers):
return self.uri_opener.POST(address, data=message, headers=headers)

def post_xml(self, address, envelope, headers):
from zeep.wsdl.utils import etree_to_string
message = etree_to_string(envelope)
return self.uri_opener.POST(address, data=message, headers=headers)

def load(self, url):
response = self.uri_opener.GET(url)
return response.body


class ZeepClientAdapter(zeep.Client):
def __init__(self, url, transport=None, *args, **kwargs):
transport = transport or ZeepTransport()
super(ZeepClientAdapter, self).__init__(url, transport=transport, *args, **kwargs)


class WSDLParser(BaseParser):
"""
This class parses WSDL documents.
Expand All @@ -48,7 +87,8 @@ class WSDLParser(BaseParser):
def __init__(self, http_response):
self._proxy = None
super(WSDLParser, self).__init__(http_response)
self._wsdl_client = zeep.Client(str(http_response.get_uri()))
wsdl_url = str(http_response.get_uri())
self._wsdl_client = ZeepClientAdapter(wsdl_url)
self._discovered_urls = set()

def __getstate__(self):
Expand All @@ -58,13 +98,13 @@ def __getstate__(self):

def __setstate__(self, state):
self.__dict__.update(state)
self._wsdl_client = zeep.Client(str(self._http_response.get_uri()))
self._wsdl_client = ZeepClientAdapter(str(self._http_response.get_uri()))

@staticmethod
def can_parse(http_resp):
url = http_resp.get_uri()
try:
wsdl_client = zeep.Client(str(url))
wsdl_client = ZeepClientAdapter(str(url))
except (XMLSyntaxError, HTTPError):
exception_description = (
"The result of url: {} seems not to be valid XML.".format(
Expand Down
3 changes: 2 additions & 1 deletion w3af/core/data/parsers/tests/test_document_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
"""
import unittest
import time
import os

from w3af import ROOT_PATH
Expand All @@ -33,6 +32,7 @@
from w3af.core.data.parsers.doc.url import URL
from w3af.core.data.parsers.document_parser import (document_parser_factory,
DocumentParser)
from w3af.plugins.tests.plugin_testing_tools import patch_network


def _build_http_response(body_content, content_type):
Expand All @@ -44,6 +44,7 @@ def _build_http_response(body_content, content_type):
return HTTPResponse(200, body_content, headers, url, url, charset='utf-8')


@patch_network
class TestDocumentParserFactory(unittest.TestCase):

PDF_FILE = os.path.join(ROOT_PATH, 'core', 'data', 'parsers', 'doc',
Expand Down
6 changes: 4 additions & 2 deletions w3af/core/data/parsers/tests/test_mp_document_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from w3af.core.data.dc.headers import Headers
from w3af.core.data.parsers.doc.html import HTMLParser
from w3af.core.data.parsers.tests.test_document_parser import _build_http_response
from w3af.plugins.tests.plugin_testing_tools import NetworkPatcher
from w3af.plugins.tests.plugin_testing_tools import NetworkPatcher, patch_network


@pytest.fixture
Expand Down Expand Up @@ -388,6 +388,7 @@ def test_dictproxy_pickle_8748(self):
parser = self.mpdoc.get_document_parser_for(resp)
assert isinstance(parser._parser, HTMLParser)

@patch_network
def test_get_tags_by_filter(self):
body = '<html><a href="/abc">foo</a><b>bar</b></html>'
url = URL('http://www.w3af.com/')
Expand All @@ -399,6 +400,7 @@ def test_get_tags_by_filter(self):

assert [Tag('a', {'href': '/abc'}, 'foo'), Tag('b', {}, 'bar')] == tags

@patch_network
def test_get_tags_by_filter_empty_tag(self):
body = '<html><script src="foo.js"></script></html>'
url = URL('http://www.w3af.com/')
Expand All @@ -413,7 +415,7 @@ def test_get_tags_by_filter_empty_tag(self):

def test_it_doesnt_silence_type_error_from_document_parser(self, html_response):
self.mpdoc._document_parser_class = MockedDamagedDocumentParser
with pytest.raises(TypeError):
with pytest.raises(TypeError), NetworkPatcher():
self.mpdoc.get_document_parser_for(html_response)


Expand Down
Loading

0 comments on commit a81845f

Please sign in to comment.