Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

changes for v0.6 #86

Merged
merged 38 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
dd000b4
first version of the dependency lock based mode
Jul 26, 2024
b65ba5b
fix proxy
Sep 3, 2024
4258b98
add integration tests again
Sep 3, 2024
bb8bbda
fixing testcases
Sep 3, 2024
705a926
remove 2 testcases
Sep 3, 2024
6f1138b
keep 2 integration testcases
Sep 3, 2024
adfe2c5
finalize the wrapper and restructure the code
Sep 7, 2024
a302bf4
implement the functionalities discussed during the last meeting
Sep 8, 2024
1bc4e89
fixed Dockerfie to support cmd parameters easily
JJ-Author Sep 9, 2024
56c4d7a
only add the parameters for the certificate parameters if https inter…
Sep 9, 2024
700022e
fix log
Sep 10, 2024
9966b5e
adding -v to run the tests
Sep 10, 2024
dbb96ca
adding -v to run the tests
Sep 10, 2024
a5f872f
fix testcases
Sep 10, 2024
3711225
start poetry from workflow
Sep 10, 2024
f6814ab
fixes
Sep 10, 2024
415fceb
fixing some comments
Sep 27, 2024
2bf7e6d
add poetry instalation to README
Oct 4, 2024
7b4c919
rename get_ontology_from_request funtion
Oct 4, 2024
2d3ceaa
transform config to dict from tuple
Oct 4, 2024
4c98e27
Create Config dataclass and some cleanups
Oct 6, 2024
b284e74
Add testcases
Oct 6, 2024
b4057d4
fix startup command in README
Oct 15, 2024
945a6fe
modify wrapper function for get_request host and path
Oct 15, 2024
5f00b27
use enum for config
Oct 15, 2024
2110f4d
use enum for config
Oct 15, 2024
af082eb
update proxy logic function def and add do_intercept hook
Oct 15, 2024
8e232fd
update proxy logic function def and add do_intercept hook
Oct 15, 2024
995033c
Merge branch 'fixing_pr_comments' of https://github.com/kuefmz/ontolo…
Oct 15, 2024
3d8435d
fix wrapper for host and path
Oct 15, 2024
5a4c00a
fix downlaod archivo
Oct 15, 2024
93fb8be
fix tests
Oct 15, 2024
28c5b08
fix do_intercept
Oct 15, 2024
18b16e0
Merge pull request #1 from kuefmz/fixing_pr_comments
kuefmz Oct 16, 2024
7d4d3a9
move depencency.ttl to tests
Oct 16, 2024
de9b6cd
remove prints
Oct 16, 2024
abeebc8
Merge branch 'main' of https://github.com/kuefmz/ontology-time-machine
Oct 16, 2024
a1c47cf
rename block function
Oct 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ jobs:
echo "$CA_CERT" > ca-cert.pem
echo "$CA_KEY" > ca-key.pem
echo "$CA_SIGNING_KEY" > ca-signing-key.pem
- name: Start the proxy
run: |
poetry run python ontologytimemachine/custom_proxy.py &
- name: Test with pytest
run: |
poetry run pytest
poetry run pytest -v
4 changes: 1 addition & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,4 @@ RUN pip install poetry==$POETRY_VERSION
RUN poetry config virtualenvs.create false
RUN poetry install --no-dev && rm pyproject.toml


CMD python3 -m proxy --ca-key-file ca-key.pem --ca-cert-file ca-cert.pem --ca-signing-key-file ca-signing-key.pem --hostname 0.0.0.0 --port $PORT --plugins ontologytimemachine.custom_proxy.OntologyTimeMachinePlugin

ENTRYPOINT ["python3", "ontologytimemachine/custom_proxy.py"]
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,15 @@ cp ca-signing-key.pem ~/ontology-time-machine/ca-signing-key.pem
### Not working:
- curl -x http://0.0.0.0:8899 -H "Accept: text/turtle" --cacert ca-cert.pem http://ontologi.es/days#


### Install poetry virtual environment
```
poetry install
```

### Activate poetry environment
```
poetry shell
```

python3 ontologytimemachine/custom_proxy.py --ontoFormat ntriples --ontoVersion originalFailoverLiveLatest --ontoPrecedence enforcedPriority
147 changes: 86 additions & 61 deletions ontologytimemachine/custom_proxy.py
JJ-Author marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,107 +1,132 @@
from proxy.http.proxy import HttpProxyBasePlugin
from proxy.http.parser import HttpParser, httpParserTypes
from proxy.http.parser import HttpParser
from proxy.common.utils import build_http_response
from proxy.http.methods import HttpMethods
from ontologytimemachine.utils.utils import proxy_logic, parse_arguments
from ontologytimemachine.utils.utils import check_if_archivo_ontology_requested
from ontologytimemachine.utils.mock_responses import mock_response_403
from requests.exceptions import SSLError, Timeout, ConnectionError, RequestException
from ontologytimemachine.proxy_wrapper import HttpRequestWrapper
from ontologytimemachine.utils.proxy_logic import (
get_response_from_request,
if_not_block_host,
is_archivo_ontology_request,
)
from ontologytimemachine.utils.config import Config, parse_arguments
from http.client import responses
import proxy
import sys
import logging


IP = '0.0.0.0'
PORT = '8899'
IP = "0.0.0.0"
PORT = "8899"

config = None

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


class OntologyTimeMachinePlugin(HttpProxyBasePlugin):
def __init__(self, *args, **kwargs):
logger.info("Init")
super().__init__(*args, **kwargs)
(self.ontoFormat, self.ontoVersion, self.only_ontologies,
self.https_intercept, self.inspect_redirects, self.forward_headers,
self.subject_binary_search_threshold) = parse_arguments()
self.config = config

def before_upstream_connection(self, request: HttpParser) -> HttpParser | None:
print(config)
logger.info("Before upstream connection hook")
logger.info(
f"Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}"
)
wrapped_request = HttpRequestWrapper(request)

def before_upstream_connection(self, request: HttpParser):
logger.info('Before upstream connection hook')
logger.info(f'Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}')
if wrapped_request.is_connect_request():
logger.info(f"HTTPS interception mode: {self.config.httpsInterception}")

if request.method == b'CONNECT':
logger.info(f'HTTPS interception mode: {self.https_intercept}')
# Only intercept if interception is enabled
if self.https_intercept in ['all', 'archivo']:
if if_not_block_host(self.config):
logger.info("HTTPS interception is on, forwardig the request")
return request
else:
logger.info("HTTPS interception is blocked")
return None


ontology_request = check_if_archivo_ontology_requested(request)
# If only ontology mode, return None in all other cases
if self.only_ontologies and not ontology_request:
logger.warning('Request denied: not an ontology request and only ontologies mode is enabled')
self.queue_response(mock_response_403)
return None

if ontology_request:
logger.debug('The request is for an ontology')
response = proxy_logic(request, self.ontoFormat, self.ontoVersion)
# # If only ontology mode, return None in all other cases
logger.info(f"Config: {self.config}")
response = get_response_from_request(wrapped_request, self.config)
if response:
self.queue_response(response)
return None
return request


def handle_client_request(self, request: HttpParser):
logger.info('Handle client request hook')
logger.info(f'Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}')

logger.debug(request.method)
if request.method == b'CONNECT':
return request
return request

ontology_request = check_if_archivo_ontology_requested(request)
if not ontology_request:
logger.info('The requested IRI is not part of DBpedia Archivo')
return request
def do_intercept(self, _request: HttpParser) -> bool:
wrapped_request = HttpRequestWrapper(_request)
if self.config.httpsInterception in ["all"]:
return True
elif self.config.httpsInterception in ["none"]:
return False
elif self.config.httpsInterception in ["archivo"]:
if is_archivo_ontology_request(wrapped_request):
return True
return False
else:
logger.info(
f"httpsInterception: {self.config.httpsInterception} option is not allowed."
)
return False

response = proxy_logic(request, self.ontoFormat, self.ontoVersion)
self.queue_response(response)
def handle_client_request(self, request: HttpParser) -> HttpParser:
logger.info("Handle client request hook")
logger.info(
f"Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}"
)

return None

return request

def handle_upstream_chunk(self, chunk: memoryview):
return chunk


def queue_response(self, response):
self.client.queue(
build_http_response(
response.status_code,
reason=bytes(responses[response.status_code], 'utf-8'),
response.status_code,
reason=bytes(responses[response.status_code], "utf-8"),
headers={
b'Content-Type': bytes(response.headers.get('Content-Type'), 'utf-8')
},
body=response.content
b"Content-Type": bytes(
response.headers.get("Content-Type"), "utf-8"
)
},
body=response.content,
)
)


if __name__ == '__main__':
if __name__ == "__main__":

config = parse_arguments()

sys.argv = [sys.argv[0]]

# check it https interception is enabled
if config.httpsInterception != "none":
sys.argv += [
"--ca-key-file",
"ca-key.pem",
"--ca-cert-file",
"ca-cert.pem",
"--ca-signing-key-file",
"ca-signing-key.pem",
]

sys.argv += [
'--ca-key-file', 'ca-key.pem',
'--ca-cert-file', 'ca-cert.pem',
'--ca-signing-key-file', 'ca-signing-key.pem',
]
sys.argv += [
'--hostname', IP,
'--port', PORT,
'--plugins', __name__ + '.OntologyTimeMachinePlugin'
"--hostname",
IP,
"--port",
PORT,
"--plugins",
__name__ + ".OntologyTimeMachinePlugin",
]

logger.info("Starting OntologyTimeMachineProxy server...")
proxy.main()
proxy.main()
110 changes: 110 additions & 0 deletions ontologytimemachine/proxy_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from abc import ABC, abstractmethod
from proxy.http.parser import HttpParser
import logging
from typing import Tuple, Dict, Any

# Configure logger
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


class AbstractRequestWrapper(ABC):
def __init__(self, request: Any) -> None:
self.request = request

@abstractmethod
def is_get_request(self) -> bool:
pass

@abstractmethod
def is_connect_request(self) -> bool:
pass

@abstractmethod
def is_head_request(self) -> bool:
pass

@abstractmethod
def is_https_request(self) -> bool:
pass

@abstractmethod
def get_request_host(self) -> str:
pass

@abstractmethod
def get_request_path(self) -> str:
pass

@abstractmethod
def get_request_headers(self) -> Dict[str, str]:
pass

@abstractmethod
def get_request_accept_header(self) -> str:
pass

@abstractmethod
def set_request_accept_header(self, mime_type: str) -> None:
pass

@abstractmethod
def get_request_url_host_path(self) -> Tuple[str, str, str]:
pass


class HttpRequestWrapper(AbstractRequestWrapper):
def __init__(self, request: HttpParser) -> None:
super().__init__(request)

def is_get_request(self) -> bool:
return self.request.method == b"GET"

def is_connect_request(self) -> bool:
return self.request.method == b"CONNECT"

def is_head_request(self) -> bool:
return self.request.method == b"HEAD"

def is_https_request(self) -> bool:
return self.request.method == b"CONNECT" or self.request.headers.get(
b"Host", b""
).startswith(b"https")

def get_request_host(self) -> str:
return self.request.host.decode("utf-8")

def get_request_path(self) -> str:
return self.request.path.decode("utf-8")

def get_request_headers(self) -> Dict[str, str]:
headers: Dict[str, str] = {}
for k, v in self.request.headers.items():
headers[v[0].decode("utf-8")] = v[1].decode("utf-8")
return headers

def get_request_accept_header(self) -> str:
logger.info("Wrapper - get_request_accept_header")
return self.request.headers[b"accept"][1].decode("utf-8")

def set_request_accept_header(self, mime_type: str) -> None:
self.request.headers[b"accept"] = (b"Accept", mime_type.encode("utf-8"))
logger.info(f'Accept header set to: {self.request.headers[b"accept"][1]}')

def get_request_url_host_path(self) -> Tuple[str, str, str]:
logger.info("Get ontology from request")
if (self.request.method in {b"GET", b"HEAD"}) and not self.request.host:
for k, v in self.request.headers.items():
if v[0].decode("utf-8") == "Host":
host = v[1].decode("utf-8")
path = self.request.path.decode("utf-8")
url = f"https://{host}{path}"
else:
host = self.request.host.decode("utf-8")
path = self.request.path.decode("utf-8")
url = str(self.request._url)

logger.info(f"Ontology: {url}")
return url, host, path
Loading