Skip to content

Commit

Permalink
Merge pull request #1 from kuefmz/fixing_pr_comments
Browse files Browse the repository at this point in the history
Fixing comments from @JJ-Author
  • Loading branch information
kuefmz authored Oct 16, 2024
2 parents f6814ab + 28c5b08 commit 18b16e0
Show file tree
Hide file tree
Showing 16 changed files with 1,000 additions and 698 deletions.
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,14 @@ cp ca-signing-key.pem ~/ontology-time-machine/ca-signing-key.pem
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://ontologi.es/days#


### Install poetry virtual environment
```
poetry install
```

### Activate poetry environment
```
poetry shell
```

python3 -m proxy --ca-key-file ca-key.pem --ca-cert-file ca-cert.pem --ca-signing-key-file ca-signing-key.pem --hostname IP --port 8899 --plugins ontologytimemachine.custom_proxy.OntologyTimeMachinePlugin --ontoFormat ntriples --ontoVersion originalFailoverLive --ontoPrecedence enforcedPriority
python3 ontologytimemachine/custom_proxy.py --ontoFormat ntriples --ontoVersion originalFailoverLiveLatest --ontoPrecedence enforcedPriority
132 changes: 74 additions & 58 deletions ontologytimemachine/custom_proxy.py
Original file line number Diff line number Diff line change
@@ -1,116 +1,132 @@
from proxy.http.proxy import HttpProxyBasePlugin
from proxy.http.parser import HttpParser
from proxy.common.utils import build_http_response
from ontologytimemachine.utils.utils import parse_arguments
from ontologytimemachine.utils.mock_responses import mock_response_403
from ontologytimemachine.proxy_wrapper import HttpRequestWrapper
from ontologytimemachine.utils.proxy_logic import proxy_logic, is_ontology_request_only_ontology
from ontologytimemachine.utils.proxy_logic import is_archivo_ontology_request
from ontologytimemachine.utils.proxy_logic import if_intercept_host
from ontologytimemachine.utils.proxy_logic import (
get_response_from_request,
if_intercept_host,
is_archivo_ontology_request,
)
from ontologytimemachine.utils.config import Config, parse_arguments
from http.client import responses
import proxy
import sys
import logging


IP = '0.0.0.0'
PORT = '8899'
IP = "0.0.0.0"
PORT = "8899"

config = ({'format': 'turtle', 'precedence': 'enforcedPriority', 'patchAcceptUpstream': False}, 'originalFailoverLiveLatest', False, 'all', False, True, None, None)
config = None

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


class OntologyTimeMachinePlugin(HttpProxyBasePlugin):
def __init__(self, *args, **kwargs):
logger.info('Init')
logger.info("Init")
super().__init__(*args, **kwargs)
(self.ontoFormat, self.ontoVersion, self.restrictedAccess,
self.httpsInterception, self.disableRemovingRedirects,
self.forward_headers, self.timestamp, self.manifest) = config
self.config = config

def before_upstream_connection(self, request: HttpParser):
logger.info('Before upstream connection hook')
logger.info(f'Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}')
def before_upstream_connection(self, request: HttpParser) -> HttpParser | None:
print(config)
logger.info("Before upstream connection hook")
logger.info(
f"Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}"
)
wrapped_request = HttpRequestWrapper(request)

if wrapped_request.is_connect_request():
logger.info(f'HTTPS interception mode: {self.httpsInterception}')
logger.info(f"HTTPS interception mode: {self.config.httpsInterception}")

# Only intercept if interception is enabled
# Move this to the utils
if if_intercept_host(self.httpsInterception):
logger.info('HTTPS interception is on, forwardig the request')
if if_intercept_host(self.config):
logger.info("HTTPS interception is on, forwardig the request")
return request
else:
logger.info('HTTPS interception is turned off')
logger.info("HTTPS interception is blocked")
return None

# If only ontology mode, return None in all other cases
if is_ontology_request_only_ontology(wrapped_request, self.restrictedAccess):
logger.warning('Request denied: not an ontology request and only ontologies mode is enabled')
self.queue_response(mock_response_403)
return None

if is_archivo_ontology_request(wrapped_request):
logger.debug('The request is for an ontology')
response = proxy_logic(wrapped_request, self.ontoFormat, self.ontoVersion, self.disableRemovingRedirects, self.timestamp, self.manifest)
# # If only ontology mode, return None in all other cases
logger.info(f"Config: {self.config}")
response = get_response_from_request(wrapped_request, self.config)
if response:
self.queue_response(response)
return None
return request

def handle_client_request(self, request: HttpParser):
logger.info('Handle client request hook')
logger.info(f'Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}')
return request

wrapped_request = HttpRequestWrapper(request)
if wrapped_request.is_connect_request():
return request
def do_intercept(self, _request: HttpParser) -> bool:
wrapped_request = HttpRequestWrapper(_request)
if self.config.httpsInterception in ["all"]:
return True
elif self.config.httpsInterception in ["none"]:
return False
elif self.config.httpsInterception in ["archivo"]:
if is_archivo_ontology_request(wrapped_request):
return True
return False
else:
logger.info(
f"httpsInterception: {self.config.httpsInterception} option is not allowed."
)
return False

is_ontology_request = is_archivo_ontology_request(wrapped_request)
if not is_ontology_request:
logger.info('The requested IRI is not part of DBpedia Archivo')
return request
def handle_client_request(self, request: HttpParser) -> HttpParser:
logger.info("Handle client request hook")
logger.info(
f"Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}"
)

response = proxy_logic(wrapped_request, self.ontoFormat, self.ontoVersion, self.disableRemovingRedirects, self.timestamp, self.manifest)
self.queue_response(response)
return request

return None

def handle_upstream_chunk(self, chunk: memoryview):
return chunk

def queue_response(self, response):
self.client.queue(
build_http_response(
response.status_code,
reason=bytes(responses[response.status_code], 'utf-8'),
response.status_code,
reason=bytes(responses[response.status_code], "utf-8"),
headers={
b'Content-Type': bytes(response.headers.get('Content-Type'), 'utf-8')
},
body=response.content
b"Content-Type": bytes(
response.headers.get("Content-Type"), "utf-8"
)
},
body=response.content,
)
)


if __name__ == '__main__':
if __name__ == "__main__":

config = parse_arguments()

sys.argv = [sys.argv[0]]

# check it https interception is enabled
if config[3] != 'none':
if config.httpsInterception != "none":
sys.argv += [
'--ca-key-file', 'ca-key.pem',
'--ca-cert-file', 'ca-cert.pem',
'--ca-signing-key-file', 'ca-signing-key.pem',
"--ca-key-file",
"ca-key.pem",
"--ca-cert-file",
"ca-cert.pem",
"--ca-signing-key-file",
"ca-signing-key.pem",
]

sys.argv += [
'--hostname', IP,
'--port', PORT,
'--plugins', __name__ + '.OntologyTimeMachinePlugin'
"--hostname",
IP,
"--port",
PORT,
"--plugins",
__name__ + ".OntologyTimeMachinePlugin",
]

logger.info("Starting OntologyTimeMachineProxy server...")
proxy.main()
proxy.main()
97 changes: 53 additions & 44 deletions ontologytimemachine/proxy_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from abc import ABC, abstractmethod
from proxy.http.parser import HttpParser
import logging
from typing import Tuple, Dict, Any


logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Configure logger
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


class AbstractRequestWrapper(ABC):
def __init__(self, request):
def __init__(self, request: Any) -> None:
self.request = request

@abstractmethod
Expand All @@ -28,74 +31,80 @@ def is_https_request(self) -> bool:
pass

@abstractmethod
def get_request(self):
def get_request_host(self) -> str:
pass

@abstractmethod
def get_request_path(self) -> str:
pass

@abstractmethod
def get_request_headers(self):
def get_request_headers(self) -> Dict[str, str]:
pass

@abstractmethod
def get_request_accept_header(self):
def get_request_accept_header(self) -> str:
pass

@abstractmethod
def set_request_accept_header(self, mime_type):
def set_request_accept_header(self, mime_type: str) -> None:
pass

@abstractmethod
def get_ontology_from_request(self):
def get_request_url_host_path(self) -> Tuple[str, str, str]:
pass


class HttpRequestWrapper(AbstractRequestWrapper):
def __init__(self, request: HttpParser):
def __init__(self, request: HttpParser) -> None:
super().__init__(request)

def is_get_request(self) -> bool:
return self.request.method == b'GET'
return self.request.method == b"GET"

def is_connect_request(self):
return self.request.method == b'CONNECT'
def is_connect_request(self) -> bool:
return self.request.method == b"CONNECT"

def is_head_request(self):
return self.request.method == b'HEAD'
def is_head_request(self) -> bool:
return self.request.method == b"HEAD"

def is_https_request(self):
return self.request.method == b'CONNECT' or self.request.headers.get(b'Host', b'').startswith(b'https')
def is_https_request(self) -> bool:
return self.request.method == b"CONNECT" or self.request.headers.get(
b"Host", b""
).startswith(b"https")

def get_request_host(self) -> str:
return self.request.host.decode("utf-8")

def get_request(self):
return self.request
def get_request_path(self) -> str:
return self.request.path.decode("utf-8")

def get_request_headers(self):
headers = {}
def get_request_headers(self) -> Dict[str, str]:
headers: Dict[str, str] = {}
for k, v in self.request.headers.items():
headers[v[0].decode('utf-8')] = v[1].decode('utf-8')
headers[v[0].decode("utf-8")] = v[1].decode("utf-8")
return headers

def get_request_accept_header(self):
logger.info('Wrapper - get_request_accept_header')
return self.request.headers[b'accept'][1].decode('utf-8')
def set_request_accept_header(self, mime_type):
self.request.headers[b'accept'] = (b'Accept', mime_type.encode('utf-8'))
def get_request_accept_header(self) -> str:
logger.info("Wrapper - get_request_accept_header")
return self.request.headers[b"accept"][1].decode("utf-8")

def set_request_accept_header(self, mime_type: str) -> None:
self.request.headers[b"accept"] = (b"Accept", mime_type.encode("utf-8"))
logger.info(f'Accept header set to: {self.request.headers[b"accept"][1]}')

def get_ontology_from_request(self):
logger.info('Get ontology from request')
print(f'Request protocol: {self.request.protocol}')
print(f'Request host: {self.request.host}')
print(f'Request _url: {self.request._url}')
print(f'Request path: {self.request.path}')
if (self.request.method == b'GET' or self.request.method == b'HEAD') and not self.request.host:

def get_request_url_host_path(self) -> Tuple[str, str, str]:
logger.info("Get ontology from request")
if (self.request.method in {b"GET", b"HEAD"}) and not self.request.host:
for k, v in self.request.headers.items():
if v[0].decode('utf-8') == 'Host':
host = v[1].decode('utf-8')
path = self.request.path.decode('utf-8')
ontology = 'https://' + host + path
if v[0].decode("utf-8") == "Host":
host = v[1].decode("utf-8")
path = self.request.path.decode("utf-8")
url = f"https://{host}{path}"
else:
host = self.request.host.decode('utf-8')
path = self.request.path.decode('utf-8')
ontology = str(self.request._url)
logger.info(f'Ontology: {ontology}')
return ontology, host, path
host = self.request.host.decode("utf-8")
path = self.request.path.decode("utf-8")
url = str(self.request._url)

logger.info(f"Ontology: {url}")
return url, host, path
Loading

0 comments on commit 18b16e0

Please sign in to comment.