Skip to content

Commit

Permalink
Merge pull request #86 from kuefmz/main
Browse files Browse the repository at this point in the history
changes for v0.6
  • Loading branch information
JJ-Author authored Oct 17, 2024
2 parents 0225a34 + a1c47cf commit abf14f3
Show file tree
Hide file tree
Showing 19 changed files with 1,483 additions and 682 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ jobs:
echo "$CA_CERT" > ca-cert.pem
echo "$CA_KEY" > ca-key.pem
echo "$CA_SIGNING_KEY" > ca-signing-key.pem
- name: Start the proxy
run: |
poetry run python ontologytimemachine/custom_proxy.py &
- name: Test with pytest
run: |
poetry run pytest
poetry run pytest -v
4 changes: 1 addition & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,4 @@ RUN pip install poetry==$POETRY_VERSION
RUN poetry config virtualenvs.create false
RUN poetry install --no-dev && rm pyproject.toml


CMD python3 -m proxy --ca-key-file ca-key.pem --ca-cert-file ca-cert.pem --ca-signing-key-file ca-signing-key.pem --hostname 0.0.0.0 --port $PORT --plugins ontologytimemachine.custom_proxy.OntologyTimeMachinePlugin

ENTRYPOINT ["python3", "ontologytimemachine/custom_proxy.py"]
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,15 @@ cp ca-signing-key.pem ~/ontology-time-machine/ca-signing-key.pem
### Not working:
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://ontologi.es/days#


### Install poetry virtual environment
```
poetry install
```

### Activate poetry environment
```
poetry shell
```

python3 ontologytimemachine/custom_proxy.py --ontoFormat ntriples --ontoVersion originalFailoverLiveLatest --ontoPrecedence enforcedPriority
147 changes: 86 additions & 61 deletions ontologytimemachine/custom_proxy.py
Original file line number Diff line number Diff line change
@@ -1,107 +1,132 @@
from proxy.http.proxy import HttpProxyBasePlugin
from proxy.http.parser import HttpParser, httpParserTypes
from proxy.http.parser import HttpParser
from proxy.common.utils import build_http_response
from proxy.http.methods import HttpMethods
from ontologytimemachine.utils.utils import proxy_logic, parse_arguments
from ontologytimemachine.utils.utils import check_if_archivo_ontology_requested
from ontologytimemachine.utils.mock_responses import mock_response_403
from requests.exceptions import SSLError, Timeout, ConnectionError, RequestException
from ontologytimemachine.proxy_wrapper import HttpRequestWrapper
from ontologytimemachine.utils.proxy_logic import (
get_response_from_request,
if_not_block_host,
is_archivo_ontology_request,
)
from ontologytimemachine.utils.config import Config, parse_arguments
from http.client import responses
import proxy
import sys
import logging


IP = '0.0.0.0'
PORT = '8899'
IP = "0.0.0.0"
PORT = "8899"

config = None

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


class OntologyTimeMachinePlugin(HttpProxyBasePlugin):
def __init__(self, *args, **kwargs):
logger.info("Init")
super().__init__(*args, **kwargs)
(self.ontoFormat, self.ontoVersion, self.only_ontologies,
self.https_intercept, self.inspect_redirects, self.forward_headers,
self.subject_binary_search_threshold) = parse_arguments()
self.config = config

def before_upstream_connection(self, request: HttpParser) -> HttpParser | None:
print(config)
logger.info("Before upstream connection hook")
logger.info(
f"Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}"
)
wrapped_request = HttpRequestWrapper(request)

def before_upstream_connection(self, request: HttpParser):
logger.info('Before upstream connection hook')
logger.info(f'Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}')
if wrapped_request.is_connect_request():
logger.info(f"HTTPS interception mode: {self.config.httpsInterception}")

if request.method == b'CONNECT':
logger.info(f'HTTPS interception mode: {self.https_intercept}')
# Only intercept if interception is enabled
if self.https_intercept in ['all', 'archivo']:
if if_not_block_host(self.config):
logger.info("HTTPS interception is on, forwardig the request")
return request
else:
logger.info("HTTPS interception is blocked")
return None


ontology_request = check_if_archivo_ontology_requested(request)
# If only ontology mode, return None in all other cases
if self.only_ontologies and not ontology_request:
logger.warning('Request denied: not an ontology request and only ontologies mode is enabled')
self.queue_response(mock_response_403)
return None

if ontology_request:
logger.debug('The request is for an ontology')
response = proxy_logic(request, self.ontoFormat, self.ontoVersion)
# # If only ontology mode, return None in all other cases
logger.info(f"Config: {self.config}")
response = get_response_from_request(wrapped_request, self.config)
if response:
self.queue_response(response)
return None
return request


def handle_client_request(self, request: HttpParser):
logger.info('Handle client request hook')
logger.info(f'Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}')

logger.debug(request.method)
if request.method == b'CONNECT':
return request
return request

ontology_request = check_if_archivo_ontology_requested(request)
if not ontology_request:
logger.info('The requested IRI is not part of DBpedia Archivo')
return request
def do_intercept(self, _request: HttpParser) -> bool:
wrapped_request = HttpRequestWrapper(_request)
if self.config.httpsInterception in ["all"]:
return True
elif self.config.httpsInterception in ["none"]:
return False
elif self.config.httpsInterception in ["archivo"]:
if is_archivo_ontology_request(wrapped_request):
return True
return False
else:
logger.info(
f"httpsInterception: {self.config.httpsInterception} option is not allowed."
)
return False

response = proxy_logic(request, self.ontoFormat, self.ontoVersion)
self.queue_response(response)
def handle_client_request(self, request: HttpParser) -> HttpParser:
logger.info("Handle client request hook")
logger.info(
f"Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}"
)

return None

return request

def handle_upstream_chunk(self, chunk: memoryview):
return chunk


def queue_response(self, response):
self.client.queue(
build_http_response(
response.status_code,
reason=bytes(responses[response.status_code], 'utf-8'),
response.status_code,
reason=bytes(responses[response.status_code], "utf-8"),
headers={
b'Content-Type': bytes(response.headers.get('Content-Type'), 'utf-8')
},
body=response.content
b"Content-Type": bytes(
response.headers.get("Content-Type"), "utf-8"
)
},
body=response.content,
)
)


if __name__ == '__main__':
if __name__ == "__main__":

config = parse_arguments()

sys.argv = [sys.argv[0]]

# check it https interception is enabled
if config.httpsInterception != "none":
sys.argv += [
"--ca-key-file",
"ca-key.pem",
"--ca-cert-file",
"ca-cert.pem",
"--ca-signing-key-file",
"ca-signing-key.pem",
]

sys.argv += [
'--ca-key-file', 'ca-key.pem',
'--ca-cert-file', 'ca-cert.pem',
'--ca-signing-key-file', 'ca-signing-key.pem',
]
sys.argv += [
'--hostname', IP,
'--port', PORT,
'--plugins', __name__ + '.OntologyTimeMachinePlugin'
"--hostname",
IP,
"--port",
PORT,
"--plugins",
__name__ + ".OntologyTimeMachinePlugin",
]

logger.info("Starting OntologyTimeMachineProxy server...")
proxy.main()
proxy.main()
110 changes: 110 additions & 0 deletions ontologytimemachine/proxy_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from abc import ABC, abstractmethod
from proxy.http.parser import HttpParser
import logging
from typing import Tuple, Dict, Any

# Configure logger
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


class AbstractRequestWrapper(ABC):
def __init__(self, request: Any) -> None:
self.request = request

@abstractmethod
def is_get_request(self) -> bool:
pass

@abstractmethod
def is_connect_request(self) -> bool:
pass

@abstractmethod
def is_head_request(self) -> bool:
pass

@abstractmethod
def is_https_request(self) -> bool:
pass

@abstractmethod
def get_request_host(self) -> str:
pass

@abstractmethod
def get_request_path(self) -> str:
pass

@abstractmethod
def get_request_headers(self) -> Dict[str, str]:
pass

@abstractmethod
def get_request_accept_header(self) -> str:
pass

@abstractmethod
def set_request_accept_header(self, mime_type: str) -> None:
pass

@abstractmethod
def get_request_url_host_path(self) -> Tuple[str, str, str]:
pass


class HttpRequestWrapper(AbstractRequestWrapper):
def __init__(self, request: HttpParser) -> None:
super().__init__(request)

def is_get_request(self) -> bool:
return self.request.method == b"GET"

def is_connect_request(self) -> bool:
return self.request.method == b"CONNECT"

def is_head_request(self) -> bool:
return self.request.method == b"HEAD"

def is_https_request(self) -> bool:
return self.request.method == b"CONNECT" or self.request.headers.get(
b"Host", b""
).startswith(b"https")

def get_request_host(self) -> str:
return self.request.host.decode("utf-8")

def get_request_path(self) -> str:
return self.request.path.decode("utf-8")

def get_request_headers(self) -> Dict[str, str]:
headers: Dict[str, str] = {}
for k, v in self.request.headers.items():
headers[v[0].decode("utf-8")] = v[1].decode("utf-8")
return headers

def get_request_accept_header(self) -> str:
logger.info("Wrapper - get_request_accept_header")
return self.request.headers[b"accept"][1].decode("utf-8")

def set_request_accept_header(self, mime_type: str) -> None:
self.request.headers[b"accept"] = (b"Accept", mime_type.encode("utf-8"))
logger.info(f'Accept header set to: {self.request.headers[b"accept"][1]}')

def get_request_url_host_path(self) -> Tuple[str, str, str]:
logger.info("Get ontology from request")
if (self.request.method in {b"GET", b"HEAD"}) and not self.request.host:
for k, v in self.request.headers.items():
if v[0].decode("utf-8") == "Host":
host = v[1].decode("utf-8")
path = self.request.path.decode("utf-8")
url = f"https://{host}{path}"
else:
host = self.request.host.decode("utf-8")
path = self.request.path.decode("utf-8")
url = str(self.request._url)

logger.info(f"Ontology: {url}")
return url, host, path
Loading

0 comments on commit abf14f3

Please sign in to comment.