Skip to content

Commit

Permalink
Merge branch 'main' into feature/#255-Rename_data_science_sandbox_to_…
Browse files Browse the repository at this point in the history
…exasol-ai-lab
  • Loading branch information
ckunki committed Jan 30, 2024
2 parents c25fbc6 + 4c6fc9b commit 0f14f41
Show file tree
Hide file tree
Showing 18 changed files with 401 additions and 21 deletions.
1 change: 1 addition & 0 deletions doc/changes/changes_0.1.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Version: 0.1.0
* #76: Added display of usage instructions for AI-Lab Docker edition
* #137: Set Jupyter lab default URL to AI-Lab start page
* #75: Changed default port of Jupyter server to 49494
* #145: Add Docket Test Library to prepare Notebook tests
* #255: Renamed data science sandbox to exasol-ai-lab

## Bug Fixes
Expand Down
3 changes: 1 addition & 2 deletions doc/developer_guide/aws.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,4 @@ The command tags only the dynamically created entities with the *asset-id* but n

The command `show-aws-assets` lists all assets which were created during the execution.
* This is very useful if an error occured.
* If the creation of a sandbox finished normally the list should contain only the AMI, images (S3 objects) and the export tasks (one for each image).

* If the creation of a sandbox finished normally the list should contain only the AMI, images (S3 objects) and the export tasks (one for each image).
3 changes: 3 additions & 0 deletions doc/developer_guide/developer_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ Script `start-test-release-build` requires environment variable `GH_TOKEN` to co
3. [Testing](testing.md)
4. [Running tests in the CI](ci.md)
5. [Updating Packages](updating_packages.md)
6. [Notebooks](notebooks.md)


30 changes: 30 additions & 0 deletions doc/developer_guide/notebooks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Notebook Files

DSS repository includes some Jupyter notebooks and scripts to add these notebooks to DSS images, e.g. AMI or Docker Images.

Please add or update the notebook files in folder [exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook](../../exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook).

## Notebook Testing

We are running tests for the notebooks in the Docker Edition of the AI Lab. For this we are creating a Docker test setup in
[test_notebooks_in_dss_docker_image.py](test/integration/test_notebooks_in_dss_docker_image.py) which installs test libraries into the AI Lab Docker Image.
It further creates a new test and Docker Container for each notebook test in [test/notebooks](test/notebooks).
Notebook test names need to fit the pattern `nbtest_*.py`, to prevent pytest running them outside of Docker setup.

Environment variables with the prefix `NBTEST_` with which you call
[test_notebooks_in_dss_docker_image.py](test/integration/test_notebooks_in_dss_docker_image.py) are forwarded
into the Docker container and to the notebook test. You can use this to forward secrets to the notebook tests.

By default all created containers and images are removed after running the tests regardless of success or failure.
However, with the following pytest commandline parameters you can keep them or reuse them to speed up local testing:

```
--dss-docker-image=DSS_DOCKER_IMAGE
Name and version of existing Docker image to use for tests
--keep-dss-docker-image
Keep the created dss docker image for inspection or reuse.
--docker-image-notebook-test=DOCKER_IMAGE_NOTEBOOK_TEST
Name and version of existing Docker image for Notebook testing to use for tests
--keep-docker-image-notebook-test
Keep the created notebook-test docker image for inspection or reuse.
```
36 changes: 19 additions & 17 deletions exasol/ds/sandbox/lib/dss_docker/create_image.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
import docker
import humanfriendly
import importlib_resources

from functools import reduce
from datetime import datetime
from docker.types import Mount
from exasol.ds.sandbox.lib import pretty_print
from importlib_metadata import version
from pathlib import Path
from functools import reduce
from typing import Dict, List, Optional

import docker
import humanfriendly
import importlib_resources
from docker.models.containers import Container as DockerContainer
from docker.models.images import Image as DockerImage
from importlib_metadata import version

from exasol.ds.sandbox.lib.config import ConfigObject, SLC_VERSION
from exasol.ds.sandbox.lib.logging import get_status_logger, LogType
from exasol.ds.sandbox.lib import pretty_print
from exasol.ds.sandbox.lib.ansible import ansible_repository
from exasol.ds.sandbox.lib.ansible.ansible_run_context import AnsibleRunContext
from exasol.ds.sandbox.lib.ansible.ansible_access import AnsibleAccess, AnsibleFacts
from exasol.ds.sandbox.lib.setup_ec2.run_install_dependencies import run_install_dependencies
from exasol.ds.sandbox.lib.ansible.ansible_run_context import AnsibleRunContext
from exasol.ds.sandbox.lib.config import ConfigObject, SLC_VERSION
from exasol.ds.sandbox.lib.logging import get_status_logger, LogType
from exasol.ds.sandbox.lib.setup_ec2.host_info import HostInfo
from exasol.ds.sandbox.lib.setup_ec2.run_install_dependencies import run_install_dependencies

DEFAULT_ORG_AND_REPOSITORY = "exasol/ai-lab"
# name of the project as specified in file pyproject.toml
Expand Down Expand Up @@ -152,13 +149,18 @@ def _commit_container(
_logger.info("Committing changes to docker container")
virtualenv = get_fact(facts, "jupyter", "virtualenv")
port = get_fact(facts, "jupyter", "port")
notebook_folder = get_fact(facts, "notebook_folder", "final")
notebook_folder_final = get_fact(facts, "notebook_folder", "final")
notebook_folder_initial = get_fact(facts, "notebook_folder", "initial")
conf = {
"Entrypoint": entrypoint(facts),
"Cmd": [],
"Volumes": { notebook_folder: {}, },
"ExposedPorts": { f"{port}/tcp": {} },
"Env": [ f"VIRTUAL_ENV={virtualenv}" ],
"Volumes": {notebook_folder_final: {}, },
"ExposedPorts": {f"{port}/tcp": {}},
"Env": [
f"VIRTUAL_ENV={virtualenv}",
f"NOTEBOOK_FOLDER_FINAL={notebook_folder_final}",
f"NOTEBOOK_FOLDER_INITIAL={notebook_folder_initial}"
],
}
return container.commit(
repository=self.image_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ jupysql==0.10.7
sqlalchemy_exasol==4.6.3
stopwatch.py==2.0.1
--extra-index-url https://download.pytorch.org/whl/cpu
git+https://github.com/exasol/notebook-connector.git@0.2.6
git+https://github.com/exasol/notebook-connector.git@aa1496f
16 changes: 15 additions & 1 deletion test/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ def pytest_addoption(parser):
"--dss-docker-image", default=None,
help="Name and version of existing Docker image to use for tests",
)
parser.addoption(
"--keep-dss-docker-image", action="store_true", default=False,
help="Keep the created dss docker image for inspection or reuse."
)
parser.addoption(
"--docker-image-notebook-test", default=None,
help="Name and version of existing Docker image for Notebook testing to use for tests",
)
parser.addoption(
"--keep-docker-image-notebook-test", action="store_true", default=False,
help="Keep the created notebook-test docker image for inspection or reuse.",
)
parser.addoption(
"--docker-registry", default=None, metavar="HOST:PORT",
help="Docker registry for pushing Docker images to",
Expand All @@ -23,6 +35,7 @@ def dss_docker_image(request):
--ds-docker-image-name.
"""
existing = request.config.getoption("--dss-docker-image")
keep_image = request.config.getoption(f"--keep-dss-docker-image")
if existing and ":" in existing:
name, version = existing.split(":")
yield DssDockerImage(name, version)
Expand All @@ -37,4 +50,5 @@ def dss_docker_image(request):
try:
yield testee
finally:
docker.from_env().images.remove(testee.image_name)
if not keep_image:
docker.from_env().images.remove(testee.image_name, force=True)
Empty file.
36 changes: 36 additions & 0 deletions test/integration/docker/container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import re
from typing import Union

import docker
from docker.models.containers import Container
from docker.models.images import Image


def sanitize_test_name(test_name: str):
test_name = re.sub('[^0-9a-zA-Z]+', '_', test_name)
test_name = re.sub('_+', '_', test_name)
return test_name


def container(request, base_name: str, image: Union[Image, str], start: bool = True, **kwargs) -> Container:
"""
Create a Docker container based on the specified Docker image.
"""
client = docker.from_env()
base_container_name = base_name.replace("-", "_")
test_name = sanitize_test_name(str(request.node.name))
container_name = f"{base_container_name}_{test_name}"
try:
image_name = image.id if hasattr(image, "id") else image
container = client.containers.create(
image=image_name,
name=container_name,
detach=True,
**kwargs
)
if start:
container.start()
yield container
finally:
client.containers.get(container_name).remove(force=True)
client.close()
66 changes: 66 additions & 0 deletions test/integration/docker/exec_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from typing import Optional, Tuple, Callable, Union, Iterator, cast, Mapping

from docker.models.containers import Container


def decode_bytes(bytes):
return bytes.decode("utf-8").strip()


def exec_command(
command: str,
container: Container,
print_output: bool = False,
workdir: Optional[str] = None,
environment: Optional[Mapping[str, str]] = None,
user: str = ''
) -> Optional[str]:
exit_code, output = exec_run(container, command, stream=print_output,
workdir=workdir, environment=environment, user=user)
output_string = handle_output(output, print_output)
handle_error_during_exec(command, exit_code, output_string)
return output_string


def exec_run(container: Container, cmd, stream=False, environment=None, workdir=None, user='') \
-> Tuple[Callable[[], Optional[int]], Union[bytes, Iterator[bytes]]]:
"""
Run a command in the provided Docker container and return
a function to inquire the exit code and the stdout as stream or byte array.
"""
resp = container.client.api.exec_create(
container.id, cmd, user=user, environment=environment,
workdir=workdir,
)
exec_output = container.client.api.exec_start(
resp['Id'], stream=stream
)

def exit_code() -> Optional[int]:
return cast(Optional[int], container.client.api.exec_inspect(resp['Id'])['ExitCode'])

return (
exit_code,
cast(Union[bytes, Iterator[bytes]], exec_output)
)


def handle_output(output: Union[bytes, Iterator[bytes]], print_output: bool):
output_string = None
if print_output and isinstance(output, Iterator):
for chunk in output:
print(decode_bytes(chunk))
else:
output_string = decode_bytes(output)
return output_string


def handle_error_during_exec(command: str, exit_code: Callable[[], Optional[int]], output_string: str):
exit_code = exit_code()
if exit_code != 0:
if output_string:
raise RuntimeError(
f"Command {command} failed with exit_code {exit_code} and output_string:\n {output_string}")

raise RuntimeError(
f"Command {command} failed with exit_code {exit_code},")
73 changes: 73 additions & 0 deletions test/integration/docker/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import json
import re
from datetime import datetime
from typing import List, Dict, Any, Tuple, Optional

import docker
from docker.errors import BuildError
from docker.models.images import Image


def format_build_log(build_log: List[Dict[str, Any]]):
def format_entry(entry: Dict[str, Any]):
if "stream" in entry:
return entry["stream"]
if "error" in entry:
return entry["error"]
return ""

return "\n".join(format_entry(entry) for entry in build_log)


class BuildErrorWithLog(BuildError):
def __init__(self, reason, build_log: List[Dict[str, Any]]):
super().__init__(f"{reason}\n\n{format_build_log(build_log)}", build_log)


def image(request, name: str, print_log=False, **kwargs) -> Image:
"""
Create a Docker image.
The function supports a pair of pytest cli options with a suffix derived from parameter ``name``:
Option `--docker-image-(suffix)` specifies the name of an existing image to be used
instead of creating a new one.
Option `--keep-docker-image-(suffix)` skips removing the image after test execution.
"""
base_command_line = name.replace("_", "-")
image_tag = request.config.getoption(f"--docker-image-{base_command_line}")
keep_image = request.config.getoption(f"--keep-docker-image-{base_command_line}")
client = docker.from_env()
if image_tag:
return client.images.get(image_tag)
timestamp = f'{datetime.now().timestamp():.0f}'
image_name = name.replace("-", "_")
image_tag = f"{image_name}:{timestamp}"
try:
log_generator = client.api.build(tag=image_tag, **kwargs)
image_id, log, error = analyze_build_log(log_generator)
if image_id is None:
raise BuildErrorWithLog(error, log)
if print_log:
print(format_build_log(log))
yield client.images.get(image_id)
finally:
if not keep_image:
client.images.remove(image_tag, force=True)
client.close()


def analyze_build_log(log_generator) -> Tuple[Optional[str], List[Dict[str, Any]], Optional[str]]:
log = [json.loads(chunk) for chunk in log_generator] #
last_event = "Unknown"
for entry in log:
if 'error' in entry:
return None, log, entry["error"]
if 'stream' in entry:
match = re.search(
r'(^Successfully built |sha256:)([0-9a-f]+)$',
entry['stream']
)
if match:
image_id = match.group(2)
return image_id, log, None
last_event = entry
return None, log, last_event
45 changes: 45 additions & 0 deletions test/integration/docker/in_memory_build_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import io
import tarfile
import time


class InMemoryBuildContext:

def __init__(self):
super().__init__()
self.fileobj = io.BytesIO()
self._tar = tarfile.open(fileobj=self.fileobj, mode="x")

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.close()

def close(self):
self._tar.close()
self.fileobj.seek(0)

def __del__(self):
self._tar.close()

def add_string_to_file(self, name: str, string: str):
self.add_bytes_to_file(name, string.encode("UTF-8"))

def add_bytes_to_file(self, name: str, bytes: bytes):
file_obj = io.BytesIO(bytes)
self.add_fileobj_to_file(bytes, file_obj, name)

def add_fileobj_to_file(self, bytes, file_obj, name):
tar_info = tarfile.TarInfo(name=name)
tar_info.mtime = time.time()
tar_info.size = len(bytes)
self._tar.addfile(tarinfo=tar_info, fileobj=file_obj)

def add_host_path(self, host_path: str, path_in_tar: str, recursive: bool):
self._tar.add(host_path, path_in_tar, recursive)

def add_directory(self, name: str):
tar_info = tarfile.TarInfo(name=name)
tar_info.type = tarfile.DIRTYPE
self._tar.addfile(tarinfo=tar_info)
Loading

0 comments on commit 0f14f41

Please sign in to comment.