diff --git a/.github/actions/lint/action.yml b/.github/actions/lint/action.yml index 1a49699ce..94ebba91c 100644 --- a/.github/actions/lint/action.yml +++ b/.github/actions/lint/action.yml @@ -10,7 +10,7 @@ runs: python-version: 3.9 - name: Install dependencies - run: pip install -r requirements.txt -r requirements/test_requirements.txt + run: pip install -r requirements.txt -r requirements/e2e_requirements.txt shell: bash - name: Run pre-commit @@ -18,5 +18,5 @@ runs: shell: bash - name: Lint - run: python -m pylint -j 0 -f parseable neptune tests + run: python -m pylint -j 0 -f parseable neptune tests e2e_tests shell: bash diff --git a/CHANGELOG.md b/CHANGELOG.md index a37c71bd6..417b4124b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +## [UNRELEASED] neptune-client 0.15.0 + +## Features +- Methods for creating and manipulating Model Registry objects ([#794](https://github.com/neptune-ai/neptune-client/pull/794)) + +### Changes +- Renamed --run parameter to --object in `neptune sync` (previous kept as deprecated, [#849](https://github.com/neptune-ai/neptune-client/pull/849)) +- More helpful error message on SSL validation problem ([#853](https://github.com/neptune-ai/neptune-client/pull/853)) +- Added names to daemon worker threads ([#851](https://github.com/neptune-ai/neptune-client/pull/851)) +- Stopped forwarding every attribute from Handler to Attribute ([#815](https://github.com/neptune-ai/neptune-client/pull/815)) + ## neptune-client 0.14.3 ## Features diff --git a/alpha_integration_dev/new_client.py b/alpha_integration_dev/new_client.py index ffc667eb8..caf1b6dea 100644 --- a/alpha_integration_dev/new_client.py +++ b/alpha_integration_dev/new_client.py @@ -41,7 +41,9 @@ class NewClientFeatures(ClientFeatures): def __init__(self): super().__init__() - self.exp = neptune.init(source_files="alpha_integration_dev/*.py") + self.exp = neptune.init( + source_files="alpha_integration_dev/*.py", + ) # download sources self.exp.sync() diff --git a/e2e_tests/base.py b/e2e_tests/base.py index fd1e76bca..df098d019 100644 --- a/e2e_tests/base.py +++ b/e2e_tests/base.py @@ -13,17 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__all__ = [ - "BaseE2ETest", -] +__all__ = ["BaseE2ETest", "AVAILABLE_CONTAINERS", "fake"] -import uuid import inspect from faker import Faker fake = Faker() +AVAILABLE_CONTAINERS = ["project", "run", "model", "model_version"] + class BaseE2ETest: def gen_key(self): diff --git a/e2e_tests/conftest.py b/e2e_tests/conftest.py index 0bf2dd6d2..1dd908a2f 100644 --- a/e2e_tests/conftest.py +++ b/e2e_tests/conftest.py @@ -13,7 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# pylint: disable=redefined-outer-name import os +import time from faker import Faker import boto3 @@ -21,9 +23,8 @@ from neptune.management.internal.utils import normalize_project_name from neptune.management import create_project, add_project_member -import neptune.new as neptune -from e2e_tests.utils import a_project_name, Environment +from e2e_tests.utils import initialize_container, a_project_name, Environment fake = Faker() @@ -44,6 +45,8 @@ def environment(): api_token=admin_token, ) + time.sleep(10) + add_project_member( name=created_project_identifier, username=user, @@ -64,15 +67,27 @@ def environment(): @pytest.fixture(scope="session") def container(request, environment): - if request.param == "project": - project = neptune.init_project(name=environment.project) - yield project - project.stop() - - if request.param == "run": - exp = neptune.init_run(project=environment.project) - yield exp - exp.stop() + exp = initialize_container( + container_type=request.param, project=environment.project + ) + yield exp + exp.stop() + + +@pytest.fixture(scope="session") +def containers_pair(request, environment): + container_a_type, container_b_type = request.param.split("-") + container_a = initialize_container( + container_type=container_a_type, project=environment.project + ) + container_b = initialize_container( + container_type=container_b_type, project=environment.project + ) + + yield container_a, container_b + + container_b.stop() + container_a.stop() @pytest.fixture(scope="session") diff --git a/e2e_tests/integrations/conftest.py b/e2e_tests/integrations/conftest.py index b5928e45c..22adf8997 100644 --- a/e2e_tests/integrations/conftest.py +++ b/e2e_tests/integrations/conftest.py @@ -13,235 +13,93 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import os - import pytest -import neptune.new as neptune -import numpy as np import torch -import torch.nn.functional as F -from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS -from sklearn.metrics import accuracy_score -from torch.optim.lr_scheduler import LambdaLR -from torch.utils.data import DataLoader, random_split -from torchvision import transforms -from torchvision.datasets import MNIST - -import pytorch_lightning as pl +from torch.utils.data import Dataset, DataLoader + +from pytorch_lightning import LightningModule, Trainer +from pytorch_lightning.loggers.neptune import NeptuneLogger from pytorch_lightning.callbacks import ModelCheckpoint -from pytorch_lightning.loggers import NeptuneLogger +import neptune.new as neptune -class LitModel(pl.LightningModule): - def train_dataloader(self) -> TRAIN_DATALOADERS: - """Not used, for pylint only""" - def test_dataloader(self) -> EVAL_DATALOADERS: - """Not used, for pylint only""" +class RandomDataset(Dataset): + def __init__(self, size, length): + self.len = length + # pylint: disable=no-member + self.data = torch.randn(length, size) - def val_dataloader(self) -> EVAL_DATALOADERS: - """Not used, for pylint only""" + def __getitem__(self, index): + return self.data[index] - def predict_dataloader(self) -> EVAL_DATALOADERS: - """Not used, for pylint only""" + def __len__(self): + return self.len - def __init__(self, linear, learning_rate, decay_factor, neptune_logger): + +class BoringModel(LightningModule): + # pylint: disable=abstract-method + def __init__(self): super().__init__() - self.linear = linear - self.learning_rate = learning_rate - self.decay_factor = decay_factor - self.train_img_max = 10 - self.train_img = 0 - self.layer_1 = torch.nn.Linear(28 * 28, linear) - self.layer_2 = torch.nn.Linear(linear, 20) - self.layer_3 = torch.nn.Linear(20, 10) - self.neptune_logger = neptune_logger - - def forward(self, x): - x = x.view(x.size(0), -1) - x = self.layer_1(x) - x = F.relu(x) - x = self.layer_2(x) - x = F.relu(x) - x = self.layer_3(x) - return x + self.layer = torch.nn.Linear(32, 2) + + def forward(self, *args, **kwargs): + return self.layer(*args, **kwargs) + + def training_step(self, *args, **kwargs): + batch, *_ = args + loss = self(batch).sum() + self.log("train/loss", loss) + return {"loss": loss} + + def validation_step(self, *args, **kwargs): + batch, *_ = args + loss = self(batch, **kwargs).sum() + self.log("valid/loss", loss) + + def test_step(self, *args, **kwargs): + batch, *_ = args + loss = self(batch, **kwargs).sum() + self.log("test/loss", loss) def configure_optimizers(self): - optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate) - scheduler = LambdaLR(optimizer, lambda epoch: self.decay_factor ** epoch) - return [optimizer], [scheduler] - - def training_step(self, batch, batch_idx): - x, y = batch - y_hat = self(x) - loss = F.cross_entropy(y_hat, y) - self.log("train/loss", loss, prog_bar=False) - y_true = y.cpu().detach().numpy() - y_pred = y_hat.argmax(axis=1).cpu().detach().numpy() - return {"loss": loss, "y_true": y_true, "y_pred": y_pred} - - def training_epoch_end(self, outputs): - y_true = np.array([]) - y_pred = np.array([]) - for results_dict in outputs: - y_true = np.append(y_true, results_dict["y_true"]) - y_pred = np.append(y_pred, results_dict["y_pred"]) - acc = accuracy_score(y_true, y_pred) - self.log("train/loader_acc", acc) - - def validation_step(self, batch, batch_idx, dataset_idx): - x, y = batch - y_hat = self(x) - loss = F.cross_entropy(y_hat, y) - self.log("val/loss", loss, prog_bar=False) - y_true = y.cpu().detach().numpy() - y_pred = y_hat.argmax(axis=1).cpu().detach().numpy() - return {"loss": loss, "y_true": y_true, "y_pred": y_pred} - - def validation_epoch_end(self, outputs): - for dl_idx in range(2): - y_true = np.array([]) - y_pred = np.array([]) - for results_dict in outputs[dl_idx]: - y_true = np.append(y_true, results_dict["y_true"]) - y_pred = np.append(y_pred, results_dict["y_pred"]) - acc = accuracy_score(y_true, y_pred) - self.log("val/loader_acc", acc) - - def test_step(self, batch, batch_idx): - x, y = batch - y_hat = self(x) - loss = F.cross_entropy(y_hat, y) - self.log("test/loss", loss, prog_bar=False) - y_true = y.cpu().detach().numpy() - y_pred = y_hat.argmax(axis=1).cpu().detach().numpy() - for j in np.where(np.not_equal(y_true, y_pred))[0]: - img = np.squeeze(x[j].cpu().detach().numpy()) - img[img < 0] = 0 - img = img / np.amax(img) - self.neptune_logger.experiment["model_code/test/misclassified_images"].log( - neptune.types.File.as_image(img), - description=f"y_pred={y_pred[j]}, y_true={y_true[j]}", - ) - return {"loss": loss, "y_true": y_true, "y_pred": y_pred} - - def test_epoch_end(self, outputs): - y_true = np.array([]) - y_pred = np.array([]) - for results_dict in outputs: - y_true = np.append(y_true, results_dict["y_true"]) - y_pred = np.append(y_pred, results_dict["y_pred"]) - acc = accuracy_score(y_true, y_pred) - self.log("test/acc", acc) - - -class MNISTDataModule(pl.LightningDataModule): - def predict_dataloader(self) -> EVAL_DATALOADERS: - # not required for e2e test - pass - - def __init__(self, batch_size, normalization_vector): - super().__init__() - self.batch_size = batch_size - self.normalization_vector = normalization_vector - self.mnist_train = None - self.mnist_val1 = None - self.mnist_val2 = None - self.mnist_test = None - - def prepare_data(self): - MNIST(os.getcwd(), train=True, download=True) - MNIST(os.getcwd(), train=False, download=True) - - def setup(self, stage=None): - # transforms - transform = transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize( - self.normalization_vector[0], self.normalization_vector[1] - ), - ] - ) - if stage == "fit": - mnist_train = MNIST(os.getcwd(), train=True, transform=transform) - # do not use whole set, to save time spent on training - self.mnist_train, self.mnist_val1, self.mnist_val2, _ = random_split( - mnist_train, [5000, 500, 500, 54000] - ) - if stage == "test": - self.mnist_test = MNIST(os.getcwd(), train=False, transform=transform) - - def train_dataloader(self): - mnist_train = DataLoader( - self.mnist_train, batch_size=self.batch_size, num_workers=4 - ) - return mnist_train - - def val_dataloader(self): - mnist_val1 = DataLoader( - self.mnist_val1, batch_size=self.batch_size, num_workers=4 - ) - mnist_val2 = DataLoader( - self.mnist_val2, batch_size=self.batch_size, num_workers=4 - ) - return [mnist_val1, mnist_val2] - - def test_dataloader(self): - mnist_test = DataLoader( - self.mnist_test, batch_size=self.batch_size, num_workers=1 - ) - return mnist_test + return torch.optim.SGD(self.layer.parameters(), lr=0.1) @pytest.fixture(scope="session") def pytorch_run(environment): # given - PARAMS = { - "max_epochs": 3, - "save_top_k": 2, - "learning_rate": 0.005, - "decay_factor": 0.99, - "batch_size": 64, - "linear": 64, - } - # and run = neptune.init( - name="Integration pytorch-lightning", project=environment.project + name="Pytorch-Lightning integration", project=environment.project ) # and model_checkpoint = ModelCheckpoint( dirpath="my_model/checkpoints/", - filename="{epoch:02d}-{val/loss/dataloader_idx_1:.2f}", + filename="{epoch:02d}-{valid/loss:.2f}", save_weights_only=True, - save_top_k=PARAMS["save_top_k"], + save_top_k=2, save_last=True, - monitor="val/loss/dataloader_idx_1", + monitor="valid/loss", every_n_epochs=1, ) neptune_logger = NeptuneLogger(run=run, prefix="custom_prefix") # and (Subject) - trainer = pl.Trainer( - max_epochs=PARAMS["max_epochs"], - log_every_n_steps=10, + model = BoringModel() + trainer = Trainer( + limit_train_batches=1, + limit_val_batches=1, + max_epochs=3, logger=neptune_logger, - track_grad_norm=2, callbacks=[model_checkpoint], ) - model = LitModel( - linear=PARAMS["linear"], - learning_rate=PARAMS["learning_rate"], - decay_factor=PARAMS["decay_factor"], - neptune_logger=neptune_logger, - ) - data_module = MNISTDataModule( - normalization_vector=((0.1307,), (0.3081,)), batch_size=PARAMS["batch_size"] - ) + train_data = DataLoader(RandomDataset(32, 64), batch_size=2) + val_data = DataLoader(RandomDataset(32, 64), batch_size=2) + test_data = DataLoader(RandomDataset(32, 64), batch_size=2) # then - trainer.fit(model, datamodule=data_module) - trainer.test(model, datamodule=data_module) + trainer.fit(model, train_dataloaders=train_data, val_dataloaders=val_data) + trainer.test(model, dataloaders=test_data) run.sync() yield run diff --git a/e2e_tests/integrations/test_pytorch_lightning.py b/e2e_tests/integrations/test_pytorch_lightning.py index ef83d4928..0bb9ab7b1 100644 --- a/e2e_tests/integrations/test_pytorch_lightning.py +++ b/e2e_tests/integrations/test_pytorch_lightning.py @@ -19,7 +19,6 @@ import pytorch_lightning as pl from e2e_tests.base import BaseE2ETest -from e2e_tests.integrations.common import does_series_converge @pytest.mark.integrations @@ -36,20 +35,16 @@ def test_logging_values(self, pytorch_run): assert sorted(logged_epochs) == logged_epochs assert set(logged_epochs) == {0, 1, 2} - # does train_loss converge? - training_loss = list( - pytorch_run["custom_prefix/train/loss"].fetch_values()["value"] - ) - assert does_series_converge(training_loss) + assert pytorch_run.exists("custom_prefix/valid/loss") + assert len(pytorch_run["custom_prefix/valid/loss"].fetch_values()) == 3 def test_saving_models(self, pytorch_run): best_model_path = pytorch_run["custom_prefix/model/best_model_path"].fetch() assert re.match( - r".*my_model/checkpoints/epoch=.*-val/loss/dataloader_idx_1=.*\.ckpt$", + r".*my_model/checkpoints/epoch=.*-valid/loss=.*\.ckpt$", best_model_path, ) - best_model_score = pytorch_run["custom_prefix/model/best_model_score"].fetch() - assert 0 < best_model_score < 1 + assert pytorch_run["custom_prefix/model/best_model_score"].fetch() is not None # make sure that exactly `save_top_k` checkpoints # NOTE: when `max_epochs` is close to `save_top_k` there may be less than `save_top_k` saved models diff --git a/e2e_tests/management/__init__.py b/e2e_tests/management/__init__.py index e69de29bb..b5e585d90 100644 --- a/e2e_tests/management/__init__.py +++ b/e2e_tests/management/__init__.py @@ -0,0 +1,15 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/e2e_tests/management/test_management.py b/e2e_tests/management/test_management.py index 9d5e1676c..8d57696d5 100644 --- a/e2e_tests/management/test_management.py +++ b/e2e_tests/management/test_management.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from faker import Faker import pytest from neptune.management import ( @@ -26,15 +25,13 @@ remove_project_member, ) from neptune.management.internal.utils import normalize_project_name -from e2e_tests.base import BaseE2ETest +from e2e_tests.base import BaseE2ETest, fake from e2e_tests.utils import a_project_name, Environment -fake = Faker() - @pytest.mark.management class TestManagement(BaseE2ETest): - def test_standard_scenario(self, environment: "Environment"): + def test_standard_scenario(self, environment: Environment): project_name, project_key = a_project_name(project_slug=f"{fake.slug()}-mgmt") project_identifier = normalize_project_name( name=project_name, workspace=environment.workspace diff --git a/e2e_tests/standard/test_artifacts.py b/e2e_tests/standard/test_artifacts.py index 6024919a1..69721bd56 100644 --- a/e2e_tests/standard/test_artifacts.py +++ b/e2e_tests/standard/test_artifacts.py @@ -14,26 +14,21 @@ # limitations under the License. # import os -import uuid -import tempfile import time +import tempfile from pathlib import Path import pytest -from faker import Faker -from neptune.new.attribute_container import AttributeContainer +from neptune.new.metadata_containers import MetadataContainer -from e2e_tests.base import BaseE2ETest +from e2e_tests.base import BaseE2ETest, AVAILABLE_CONTAINERS, fake from e2e_tests.utils import tmp_context, with_check_if_file_appears -fake = Faker() - - class TestArtifacts(BaseE2ETest): - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_local_creation(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_local_creation(self, container: MetadataContainer): first, second = self.gen_key(), self.gen_key() filename = fake.unique.file_name() @@ -51,8 +46,8 @@ def test_local_creation(self, container: AttributeContainer): container[first].fetch_files_list() == container[second].fetch_files_list() ) - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_assignment(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_assignment(self, container: MetadataContainer): first, second = self.gen_key(), self.gen_key() filename = fake.unique.file_name() @@ -70,8 +65,8 @@ def test_assignment(self, container: AttributeContainer): container[first].fetch_files_list() == container[second].fetch_files_list() ) - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_local_download(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_local_download(self, container: MetadataContainer): first, second = self.gen_key(), self.gen_key() filename, filepath = fake.unique.file_name(), fake.unique.file_path( depth=3 @@ -100,8 +95,8 @@ def test_local_download(self, container: AttributeContainer): container[second].download() @pytest.mark.s3 - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_s3_creation(self, container: AttributeContainer, bucket, environment): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_s3_creation(self, container: MetadataContainer, bucket, environment): first, second, prefix = ( self.gen_key(), self.gen_key(), @@ -130,8 +125,8 @@ def test_s3_creation(self, container: AttributeContainer, bucket, environment): ) @pytest.mark.s3 - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_s3_download(self, container: AttributeContainer, bucket, environment): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_s3_download(self, container: MetadataContainer, bucket, environment): first = self.gen_key() prefix = f"{environment.project}/{self.gen_key()}/{type(container).__name__}" filename, filepath = fake.unique.file_name(), fake.unique.file_path( @@ -168,8 +163,8 @@ def test_s3_download(self, container: AttributeContainer, bucket, environment): container[first].download() @pytest.mark.s3 - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_s3_existing(self, container: AttributeContainer, bucket, environment): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_s3_existing(self, container: MetadataContainer, bucket, environment): first, second, prefix = ( self.gen_key(), self.gen_key(), @@ -214,8 +209,8 @@ def test_s3_existing(self, container: AttributeContainer, bucket, environment): container[first].fetch_files_list() == container[second].fetch_files_list() ) - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_local_existing(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_local_existing(self, container: MetadataContainer): first, second = self.gen_key(), self.gen_key() filename, filepath = fake.file_name(), fake.file_path(depth=3).lstrip("/") @@ -246,8 +241,8 @@ def test_local_existing(self, container: AttributeContainer): container[first].fetch_files_list() == container[second].fetch_files_list() ) - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_hash_cache(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_hash_cache(self, container: MetadataContainer): key = self.gen_key() filename = fake.file_name() diff --git a/e2e_tests/standard/test_base.py b/e2e_tests/standard/test_base.py index af8ff36b5..c3f1043eb 100644 --- a/e2e_tests/standard/test_base.py +++ b/e2e_tests/standard/test_base.py @@ -19,30 +19,27 @@ from datetime import datetime, timezone import pytest -from faker import Faker import neptune.new as neptune -from neptune.new.attribute_container import AttributeContainer +from neptune.new.metadata_containers import Model, MetadataContainer -from e2e_tests.base import BaseE2ETest - -fake = Faker() +from e2e_tests.base import BaseE2ETest, AVAILABLE_CONTAINERS, fake class TestAtoms(BaseE2ETest): - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) @pytest.mark.parametrize( "value", [random.randint(0, 100), random.random(), fake.boolean(), fake.word()] ) - def test_simple_assign_and_fetch(self, container: AttributeContainer, value): + def test_simple_assign_and_fetch(self, container: MetadataContainer, value): key = self.gen_key() container[key] = value container.sync() assert container[key].fetch() == value - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_simple_assign_datetime(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_simple_assign_datetime(self, container: MetadataContainer): key = self.gen_key() now = datetime.now() @@ -55,14 +52,14 @@ def test_simple_assign_datetime(self, container: AttributeContainer): ) assert container[key].fetch() == expected_now - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_fetch_non_existing_key(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_fetch_non_existing_key(self, container: MetadataContainer): key = self.gen_key() with pytest.raises(AttributeError): container[key].fetch() - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_delete_atom(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_delete_atom(self, container: MetadataContainer): key = self.gen_key() value = fake.name() @@ -77,8 +74,8 @@ def test_delete_atom(self, container: AttributeContainer): class TestNamespace(BaseE2ETest): - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_reassigning(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_reassigning(self, container: MetadataContainer): namespace = self.gen_key() key = f"{fake.unique.word()}/{fake.unique.word()}" value = fake.name() @@ -103,8 +100,8 @@ def test_reassigning(self, container: AttributeContainer): assert container[f"{namespace}/{key}"].fetch() == value - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_distinct_types(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_distinct_types(self, container: MetadataContainer): namespace = self.gen_key() key = f"{fake.unique.word()}/{fake.unique.word()}" value = random.randint(0, 100) @@ -120,8 +117,8 @@ def test_distinct_types(self, container: AttributeContainer): container[namespace] = {f"{key}": new_value} container.sync() - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_delete_namespace(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_delete_namespace(self, container: MetadataContainer): namespace = fake.unique.word() key1 = fake.unique.word() key2 = fake.unique.word() @@ -145,8 +142,8 @@ def test_delete_namespace(self, container: AttributeContainer): class TestStringSet(BaseE2ETest): neptune_tags_path = "sys/tags" - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_do_not_accept_non_tag_path(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_do_not_accept_non_tag_path(self, container: MetadataContainer): random_path = "some/path" container[random_path].add(fake.unique.word()) container.sync() @@ -155,8 +152,8 @@ def test_do_not_accept_non_tag_path(self, container: AttributeContainer): # backends accepts `'sys/tags'` only container[random_path].fetch() - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_add_and_remove_tags(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_add_and_remove_tags(self, container: MetadataContainer): remaining_tag1 = fake.unique.word() remaining_tag2 = fake.unique.word() to_remove_tag1 = fake.unique.word() @@ -179,26 +176,54 @@ def test_add_and_remove_tags(self, container: AttributeContainer): } -class TestFetchRunsTable(BaseE2ETest): - def test_fetch_table(self, environment): +class TestFetchTable(BaseE2ETest): + def test_fetch_runs_table(self, environment): tag = str(uuid.uuid4()) - with neptune.init(project=environment.project) as run: + + with neptune.init_run(project=environment.project) as run: run["sys/tags"].add(tag) run["value"] = 12 + run.sync() - with neptune.init(project=environment.project) as run: + with neptune.init_run(project=environment.project) as run: run["sys/tags"].add(tag) run["another/value"] = "testing" + run.sync() # wait for the elasticsearch cache to fill time.sleep(5) - project = neptune.init_project(name=environment.project) + project = neptune.get_project(name=environment.project) runs_table = sorted( - project.fetch_runs_table(tag=tag).to_runs(), + project.fetch_runs_table(tag=tag).to_rows(), key=lambda r: r.get_attribute_value("sys/id"), ) assert len(runs_table) == 2 assert runs_table[0].get_attribute_value("value") == 12 assert runs_table[1].get_attribute_value("another/value") == "testing" + + @pytest.mark.parametrize("container", ["model"], indirect=True) + def test_fetch_model_versions_table(self, container: Model, environment): + model_sys_id = container["sys/id"].fetch() + versions_to_initialize = 5 + + for _ in range(versions_to_initialize): + with neptune.init_model_version( + model=model_sys_id, project=environment.project + ): + pass + + # wait for the elasticsearch cache to fill + time.sleep(5) + + versions_table = sorted( + container.fetch_model_versions_table().to_rows(), + key=lambda r: r.get_attribute_value("sys/id"), + ) + assert len(versions_table) == versions_to_initialize + for index in range(versions_to_initialize): + assert ( + versions_table[index].get_attribute_value("sys/id") + == f"{model_sys_id}-{index + 1}" + ) diff --git a/e2e_tests/standard/test_copy.py b/e2e_tests/standard/test_copy.py index 85af2523d..c5d8017c2 100644 --- a/e2e_tests/standard/test_copy.py +++ b/e2e_tests/standard/test_copy.py @@ -14,54 +14,37 @@ # limitations under the License. # import random +import itertools + import pytest -from faker import Faker -import neptune.new as neptune -from neptune.new.run import Run -from neptune.new.project import Project +from neptune.new.metadata_containers import MetadataContainer -from e2e_tests.base import BaseE2ETest +from e2e_tests.base import BaseE2ETest, AVAILABLE_CONTAINERS, fake -fake = Faker() +# List of every possible container type pair for instance: "run-run, run-model, model-model_version, ..." +ALL_CONTAINERS_PAIRS = list( + map("-".join, itertools.product(AVAILABLE_CONTAINERS, AVAILABLE_CONTAINERS)) +) class TestCopying(BaseE2ETest): - @pytest.mark.parametrize("container", ["run", "project"], indirect=True) + @pytest.mark.parametrize("containers_pair", ALL_CONTAINERS_PAIRS, indirect=True) @pytest.mark.parametrize( "value", [random.randint(0, 100), random.random(), fake.boolean(), fake.word()] ) - def test_copy_project_to_container(self, container: Run, value, environment): - project = neptune.init_project(name=environment.project) - - src, destination, destination2 = self.gen_key(), self.gen_key(), self.gen_key() - - project[src] = value - project.sync() - - container[destination] = project[src] - container[destination2] = container[destination] - container.sync() + def test_copy(self, containers_pair: (MetadataContainer, MetadataContainer), value): + container_a, container_b = containers_pair - assert project[src].fetch() == value - assert container[destination].fetch() == value - assert container[destination2].fetch() == value - - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - @pytest.mark.parametrize( - "value", [random.randint(0, 100), random.random(), fake.boolean(), fake.word()] - ) - def test_copy_run_to_container(self, container: Project, value, environment): - run = neptune.init_run(project=environment.project) src, destination, destination2 = self.gen_key(), self.gen_key(), self.gen_key() - container[src] = value - container.sync() + container_a[src] = value + container_a.sync() - run[destination] = container[src] - run[destination2] = run[destination] - run.sync() + container_b[destination] = container_a[src] + container_b[destination2] = container_b[destination] + container_b.sync() - assert container[src].fetch() == value - assert run[destination].fetch() == value - assert run[destination2].fetch() == value + assert container_a[src].fetch() == value + assert container_b[destination].fetch() == value + assert container_b[destination2].fetch() == value diff --git a/e2e_tests/standard/test_files.py b/e2e_tests/standard/test_files.py index 1945d325e..4eeba2c2e 100644 --- a/e2e_tests/standard/test_files.py +++ b/e2e_tests/standard/test_files.py @@ -1,25 +1,38 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import os import random import uuid from itertools import product -from pathlib import Path from typing import Set from zipfile import ZipFile import pytest -from e2e_tests.base import BaseE2ETest -from e2e_tests.standard.test_base import fake +from e2e_tests.base import BaseE2ETest, AVAILABLE_CONTAINERS, fake from e2e_tests.utils import tmp_context -from neptune.new.attribute_container import AttributeContainer +from neptune.new.metadata_containers import MetadataContainer from neptune.new.internal.backends.api_model import MultipartConfig, OptionalFeatures from neptune.new.internal.backends.hosted_neptune_backend import HostedNeptuneBackend from neptune.new.types import FileSet class TestUpload(BaseE2ETest): - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_using_new_api(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_using_new_api(self, container: MetadataContainer): # pylint: disable=protected-access assert isinstance(container._backend, HostedNeptuneBackend) assert container._backend._client_config.has_feature( @@ -29,7 +42,7 @@ def test_using_new_api(self, container: AttributeContainer): container._backend._client_config.multipart_config, MultipartConfig ) - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) @pytest.mark.parametrize( "file_size", [ @@ -37,7 +50,7 @@ def test_using_new_api(self, container: AttributeContainer): pytest.param(100 * 2 ** 10, id="small"), # 100 kB, single upload ], ) - def test_single_file(self, container: AttributeContainer, file_size: int): + def test_single_file(self, container: MetadataContainer, file_size: int): key = self.gen_key() filename = fake.file_name() downloaded_filename = fake.file_name() @@ -57,8 +70,8 @@ def test_single_file(self, container: AttributeContainer, file_size: int): assert len(content) == file_size assert content == b"\0" * file_size - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_fileset(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_fileset(self, container: MetadataContainer): key = self.gen_key() large_filesize = 10 * 2 ** 20 # 10MB large_filename = fake.file_name() @@ -142,7 +155,7 @@ def _gen_tree_paths(cls, depth, width=3) -> Set: return subpaths @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_fileset_nested_structure(self, container: AttributeContainer): + def test_fileset_nested_structure(self, container: MetadataContainer): key = self.gen_key() possible_paths = self._gen_tree_paths(depth=3) @@ -187,7 +200,7 @@ def test_fileset_nested_structure(self, container: AttributeContainer): assert content == expected_content @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_reset_fileset(self, container): + def test_reset_fileset(self, container: MetadataContainer): key = self.gen_key() filename1 = fake.file_name() filename2 = fake.file_name() @@ -220,7 +233,7 @@ def test_reset_fileset(self, container): @pytest.mark.parametrize("container", ["project", "run"], indirect=True) @pytest.mark.parametrize("delete_attribute", [True, False]) def test_single_file_override( - self, container: AttributeContainer, delete_attribute: bool + self, container: MetadataContainer, delete_attribute: bool ): key = self.gen_key() filename1 = fake.file_name() @@ -260,7 +273,7 @@ def test_single_file_override( @pytest.mark.parametrize("container", ["project", "run"], indirect=True) @pytest.mark.parametrize("delete_attribute", [True, False]) def test_fileset_file_override( - self, container: AttributeContainer, delete_attribute: bool + self, container: MetadataContainer, delete_attribute: bool ): key = self.gen_key() filename = fake.file_name() diff --git a/e2e_tests/standard/test_init.py b/e2e_tests/standard/test_init.py index 6c6dd7de3..778f12d6b 100644 --- a/e2e_tests/standard/test_init.py +++ b/e2e_tests/standard/test_init.py @@ -13,36 +13,25 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from faker import Faker +import pytest import neptune.new as neptune from neptune.new.project import Project +from neptune.new.metadata_containers import Model +from neptune.new.exceptions import NeptuneModelKeyAlreadyExistsError -from e2e_tests.base import BaseE2ETest -from e2e_tests.utils import with_check_if_file_appears - -fake = Faker() +from e2e_tests.base import BaseE2ETest, fake, AVAILABLE_CONTAINERS +from e2e_tests.utils import ( + with_check_if_file_appears, + initialize_container, + reinitialize_container, +) class TestInitRun(BaseE2ETest): - # TODO: test all remaining init parameters - def test_resuming_run(self, environment): - exp = neptune.init(project=environment.project) - - key = self.gen_key() - val = fake.word() - exp[key] = val - exp.sync() - - exp.stop() - - # pylint: disable=protected-access - exp2 = neptune.init(run=exp._short_id, project=environment.project) - assert exp2[key].fetch() == val - def test_custom_run_id(self, environment): custom_run_id = "-".join((fake.word() for _ in range(3))) - run = neptune.init(custom_run_id=custom_run_id, project=environment.project) + run = neptune.init_run(custom_run_id=custom_run_id, project=environment.project) key = self.gen_key() val = fake.word() @@ -51,11 +40,13 @@ def test_custom_run_id(self, environment): run.stop() - exp2 = neptune.init(custom_run_id=custom_run_id, project=environment.project) + exp2 = neptune.init_run( + custom_run_id=custom_run_id, project=environment.project + ) assert exp2[key].fetch() == val def test_send_source_code(self, environment): - exp = neptune.init( + exp = neptune.init_run( source_files="**/*.py", name="E2e init source code", project=environment.project, @@ -107,3 +98,33 @@ def test_init_and_readonly(self, environment): "visibility", } assert read_only_project[key].fetch() == val + + +class TestInitModel(BaseE2ETest): + @pytest.mark.parametrize("container", ["model"], indirect=True) + def test_fail_reused_model_key(self, container: Model, environment): + with pytest.raises(NeptuneModelKeyAlreadyExistsError): + model_key = container["sys/id"].fetch().split("-")[1] + neptune.init_model(key=model_key, project=environment.project) + + +class TestReinitialization(BaseE2ETest): + @pytest.mark.parametrize("container_type", AVAILABLE_CONTAINERS) + def test_resuming_container(self, container_type, environment): + container = initialize_container( + container_type=container_type, project=environment.project + ) + sys_id = container["sys/id"].fetch() + + key = self.gen_key() + val = fake.word() + container[key] = val + container.sync() + container.stop() + + reinitialized = reinitialize_container( + sys_id=sys_id, + container_type=container.container_type.value, + project=environment.project, + ) + assert reinitialized[key].fetch() == val diff --git a/e2e_tests/standard/test_multiple.py b/e2e_tests/standard/test_multiple.py new file mode 100644 index 000000000..d5a12fce2 --- /dev/null +++ b/e2e_tests/standard/test_multiple.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2021, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import random +import concurrent.futures + +import pytest +import neptune.new as neptune + +from e2e_tests.base import BaseE2ETest, fake +from e2e_tests.utils import reinitialize_container + + +def store_in_container( + sys_id: str, project: str, container_type: str, destination: str +): + container = reinitialize_container( + sys_id=sys_id, container_type=container_type, project=project + ) + container[destination] = fake.color() + container.sync() + + +class TestMultiple(BaseE2ETest): + @pytest.mark.parametrize( + "container", ["run", "model", "model_version"], indirect=True + ) + def test_single_thread( + self, container: neptune.metadata_containers.MetadataContainer, environment + ): + sys_id = container["sys/id"].fetch() + number_of_reinitialized = 5 + namespace = self.gen_key() + + reinitialized = [ + reinitialize_container( + sys_id=sys_id, + container_type=container.container_type.value, + project=environment.project, + ) + for _ in range(number_of_reinitialized) + ] + + container[f"{namespace}/{fake.unique.word()}"] = fake.color() + container.sync() + + random.shuffle(reinitialized) + for reinitialized_container in reinitialized: + reinitialized_container[f"{namespace}/{fake.unique.word()}"] = fake.color() + + random.shuffle(reinitialized) + for reinitialized_container in reinitialized: + reinitialized_container.sync() + + container.sync() + + assert len(container[namespace].fetch()) == number_of_reinitialized + 1 + + for r in reinitialized: + r.stop() + + @pytest.mark.skip(reason="no way of currently testing this") + @pytest.mark.parametrize( + "container", ["run", "model", "model_version"], indirect=True + ) + def test_multiple_processes(self, container: neptune.Run, environment): + number_of_reinitialized = 10 + namespace = self.gen_key() + + container[f"{namespace}/{fake.unique.word()}"] = fake.color() + + with concurrent.futures.ProcessPoolExecutor(max_workers=3) as executor: + futures = [ + executor.submit( + store_in_container, + sys_id=container["sys/id"].fetch(), + container_type=container.container_type.value, + project=environment.project, + destination=f"{namespace}/{fake.unique.word()}", + ) + for _ in range(number_of_reinitialized) + ] + for future in concurrent.futures.as_completed(futures): + _ = future.result() + + container.sync() + + assert len(container[namespace].fetch()) == number_of_reinitialized + 1 + + @pytest.mark.parametrize( + "container", ["run", "model", "model_version"], indirect=True + ) + def test_multiple_threads(self, container: neptune.Run, environment): + number_of_reinitialized = 10 + namespace = self.gen_key() + + container[f"{namespace}/{fake.unique.word()}"] = fake.color() + + with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: + futures = [ + executor.submit( + store_in_container, + sys_id=container["sys/id"].fetch(), + container_type=container.container_type.value, + project=environment.project, + destination=f"{namespace}/{fake.unique.word()}", + ) + for _ in range(number_of_reinitialized) + ] + for future in concurrent.futures.as_completed(futures): + _ = future.result() + + container.sync() + + assert len(container[namespace].fetch()) == number_of_reinitialized + 1 diff --git a/e2e_tests/standard/test_multiple_runs.py b/e2e_tests/standard/test_multiple_runs.py deleted file mode 100644 index 15b5f21dd..000000000 --- a/e2e_tests/standard/test_multiple_runs.py +++ /dev/null @@ -1,113 +0,0 @@ -# -# Copyright (c) 2021, Neptune Labs Sp. z o.o. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import random -import concurrent.futures - -import pytest -from faker import Faker -import neptune.new as neptune - -from e2e_tests.base import BaseE2ETest - -fake = Faker() - - -def store_in_run(run_short_id: str, project: str, destination: str): - reinitialized_run = neptune.init(run=run_short_id, project=project) - reinitialized_run[destination] = fake.color() - reinitialized_run.sync() - - -class TestMultipleRuns(BaseE2ETest): - @pytest.mark.parametrize("container", ["run"], indirect=True) - def test_multiple_runs_single(self, container: neptune.Run, environment): - # pylint: disable=protected-access,undefined-loop-variable - - number_of_reinitialized = 5 - namespace = fake.unique.word() - - reinitialized_runs = [ - neptune.init(run=container._short_id, project=environment.project) - for _ in range(number_of_reinitialized) - ] - - container[f"{namespace}/{fake.unique.word()}"] = fake.color() - container.sync() - - random.shuffle(reinitialized_runs) - for run in reinitialized_runs: - run[f"{namespace}/{fake.unique.word()}"] = fake.color() - - random.shuffle(reinitialized_runs) - for run in reinitialized_runs: - run.sync() - - container.sync() - - assert len(container[namespace].fetch()) == number_of_reinitialized + 1 - - @pytest.mark.skip(reason="no way of currently testing this") - @pytest.mark.parametrize("container", ["run"], indirect=True) - def test_multiple_runs_processes(self, container: neptune.Run, environment): - # pylint: disable=protected-access - - number_of_reinitialized = 10 - namespace = fake.unique.word() - - container[f"{namespace}/{fake.unique.word()}"] = fake.color() - - with concurrent.futures.ProcessPoolExecutor(max_workers=3) as executor: - futures = [ - executor.submit( - store_in_run, - container._short_id, - environment.project, - f"{namespace}/{fake.unique.word()}", - ) - for _ in range(number_of_reinitialized) - ] - for future in concurrent.futures.as_completed(futures): - _ = future.result() - - container.sync() - - assert len(container[namespace].fetch()) == number_of_reinitialized + 1 - - @pytest.mark.parametrize("container", ["run"], indirect=True) - def test_multiple_runs_thread(self, container: neptune.Run, environment): - # pylint: disable=protected-access - - number_of_reinitialized = 10 - namespace = fake.unique.word() - - container[f"{namespace}/{fake.unique.word()}"] = fake.color() - - with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: - futures = [ - executor.submit( - store_in_run, - container._short_id, - environment.project, - f"{namespace}/{fake.unique.word()}", - ) - for _ in range(number_of_reinitialized) - ] - for future in concurrent.futures.as_completed(futures): - _ = future.result() - - container.sync() - - assert len(container[namespace].fetch()) == number_of_reinitialized + 1 diff --git a/e2e_tests/standard/test_series.py b/e2e_tests/standard/test_series.py index 6ab54a2ab..e06c83cc3 100644 --- a/e2e_tests/standard/test_series.py +++ b/e2e_tests/standard/test_series.py @@ -17,19 +17,16 @@ import pytest from PIL import Image -from faker import Faker -from neptune.new.attribute_container import AttributeContainer +from neptune.new.metadata_containers import MetadataContainer -from e2e_tests.base import BaseE2ETest +from e2e_tests.base import BaseE2ETest, AVAILABLE_CONTAINERS, fake from e2e_tests.utils import generate_image, image_to_png, tmp_context -fake = Faker() - class TestSeries(BaseE2ETest): - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_log_numbers(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_log_numbers(self, container: MetadataContainer): key = self.gen_key() values = [random.random() for _ in range(50)] @@ -42,8 +39,8 @@ def test_log_numbers(self, container: AttributeContainer): fetched_values = container[key].fetch_values() assert list(fetched_values["value"]) == values - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_log_strings(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_log_strings(self, container: MetadataContainer): key = self.gen_key() values = [fake.word() for _ in range(50)] @@ -56,8 +53,8 @@ def test_log_strings(self, container: AttributeContainer): fetched_values = container[key].fetch_values() assert list(fetched_values["value"]) == values - @pytest.mark.parametrize("container", ["project", "run"], indirect=True) - def test_log_images(self, container: AttributeContainer): + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_log_images(self, container: MetadataContainer): key = self.gen_key() # images with size between 200KB - 12MB images = list(generate_image(size=2 ** n) for n in range(8, 12)) diff --git a/e2e_tests/standard/test_stage_transitions.py b/e2e_tests/standard/test_stage_transitions.py new file mode 100644 index 000000000..3d6056433 --- /dev/null +++ b/e2e_tests/standard/test_stage_transitions.py @@ -0,0 +1,56 @@ +# +# Copyright (c) 2021, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import pytest + +from neptune.new.metadata_containers import ModelVersion +from neptune.new.exceptions import NeptuneCannotChangeStageManually + +from e2e_tests.base import BaseE2ETest + + +class TestStageTransitions(BaseE2ETest): + @pytest.mark.parametrize("container", ["model_version"], indirect=True) + def test_transitions(self, container: ModelVersion): + container["a"] = 14 + + assert container["sys/stage"].fetch() == "none" + + container.change_stage("staging") + container.sync() + + assert container["sys/stage"].fetch() == "staging" + + container.change_stage("production") + container.sync() + + assert container["sys/stage"].fetch() == "production" + + container.change_stage("none") + container.sync() + + assert container["sys/stage"].fetch() == "none" + + @pytest.mark.parametrize("container", ["model_version"], indirect=True) + def test_fail_on_unknown_stage_value(self, container: ModelVersion): + with pytest.raises(ValueError): + container.change_stage("unknown") + container.sync() + + @pytest.mark.parametrize("container", ["model_version"], indirect=True) + def test_fail_on_manual(self, container: ModelVersion): + with pytest.raises(NeptuneCannotChangeStageManually): + container["sys/stage"] = "staging" + container.sync() diff --git a/e2e_tests/standard/test_sync.py b/e2e_tests/standard/test_sync.py index 6c0ea63e6..99dbc8512 100644 --- a/e2e_tests/standard/test_sync.py +++ b/e2e_tests/standard/test_sync.py @@ -13,139 +13,107 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import json import re +import json from pathlib import Path -import neptune.new as neptune +import pytest from click.testing import CliRunner -from faker import Faker + +import neptune.new as neptune +from neptune.new.exceptions import NeptuneException from neptune.new.sync import sync -from e2e_tests.base import BaseE2ETest -from e2e_tests.utils import DISABLE_SYSLOG_KWARGS, tmp_context +from e2e_tests.base import BaseE2ETest, fake, AVAILABLE_CONTAINERS +from e2e_tests.utils import ( + DISABLE_SYSLOG_KWARGS, + initialize_container, + reinitialize_container, + tmp_context, +) -fake = Faker() runner = CliRunner() class TestSync(BaseE2ETest): - SYNCHRONIZED_SYSID_RE = r"\w+/[\w-]+/([\w-]+)" - - def test_sync_run(self, environment): - custom_run_id = "-".join((fake.word() for _ in range(3))) + SYNCHRONIZED_SYSID_RE = r"[\w-]+/[\w-]+/([\w-]+)" + @pytest.mark.parametrize("container_type", AVAILABLE_CONTAINERS) + def test_sync_container(self, container_type, environment): with tmp_context() as tmp: - # with test values key = self.gen_key() - original_value = fake.word() - updated_value = fake.word() - - # init run - run = neptune.init( - custom_run_id=custom_run_id, - project=environment.project, - **DISABLE_SYSLOG_KWARGS, - ) - - def get_next_run(): - return neptune.init( - custom_run_id=custom_run_id, - project=environment.project, - **DISABLE_SYSLOG_KWARGS, + original_value = fake.unique.word() + updated_value = fake.unique.word() + + with initialize_container( + container_type=container_type, project=environment.project + ) as container: + # assign original value + container[key] = original_value + container.wait() + # pylint: disable=protected-access + container_id = container._id + container_sys_id = container._sys_id + + # manually add operations to queue + queue_dir = list( + Path(f"./.neptune/async/{container_type}__{container_id}/").glob( + "exec-*" ) + )[0] + with open( + queue_dir / "last_put_version", encoding="utf-8" + ) as last_put_version_f: + last_put_version = int(last_put_version_f.read()) + with open(queue_dir / "data-1.log", "a", encoding="utf-8") as queue_f: + queue_f.write( + json.dumps( + { + "obj": { + "type": "AssignString", + "path": key.split("/"), + "value": updated_value, + }, + "version": last_put_version + 1, + } + ) + ) + queue_f.write( + json.dumps( + { + "obj": { + "type": "CopyAttribute", + "path": ["copy"] + key.split("/"), + "container_id": container_id, + "container_type": container_type, + "source_path": key.split("/"), + "source_attr_name": "String", + }, + "version": last_put_version + 2, + } + ) + ) + with open( + queue_dir / "last_put_version", "w", encoding="utf-8" + ) as last_put_version_f: + last_put_version_f.write(str(last_put_version + 2)) + + with reinitialize_container( + container_sys_id, container_type, project=environment.project + ) as container: + # server should have the original value + assert container[key].fetch() == original_value + + # run neptune sync + result = runner.invoke(sync, ["--path", tmp]) + assert result.exit_code == 0 - self._test_sync( - exp=run, - get_next_exp=get_next_run, - path=tmp, - key=key, - original_value=original_value, - updated_value=updated_value, - ) - - def test_sync_project(self, environment): - with tmp_context() as tmp: - # with test values - key = f"{self.gen_key()}-" + "-".join((fake.word() for _ in range(3))) - original_value = fake.word() - updated_value = fake.word() - - # init run - project = neptune.init_project(name=environment.project) - - def get_next_project(): - return neptune.init_project(name=environment.project) - - self._test_sync( - exp=project, - get_next_exp=get_next_project, - path=tmp, - key=key, - original_value=original_value, - updated_value=updated_value, - ) - - @staticmethod - def _test_sync(exp, get_next_exp, path, key, original_value, updated_value): - # assign original value - exp[key] = original_value - exp.sync() - - # stop run - exp.stop() - - # pylint: disable=protected-access - queue_dir = list(Path(f"./.neptune/async/{exp._id}/").glob("exec-*"))[0] - with open( - queue_dir / "last_put_version", encoding="utf-8" - ) as last_put_version_f: - last_put_version = int(last_put_version_f.read()) - with open(queue_dir / "data-1.log", "a", encoding="utf-8") as queue_f: - queue_f.write( - json.dumps( - { - "obj": { - "type": "AssignString", - "path": key.split("/"), - "value": updated_value, - }, - "version": last_put_version + 1, - } - ) - ) - queue_f.write( - json.dumps( - { - "obj": { - "type": "CopyAttribute", - "path": ["copy"] + key.split("/"), - "container_id": exp._id, - "container_type": exp.container_type.value, - "source_path": key.split("/"), - "source_attr_name": "String", - }, - "version": last_put_version + 2, - } - ) - ) - with open( - queue_dir / "last_put_version", "w", encoding="utf-8" - ) as last_put_version_f: - last_put_version_f.write(str(last_put_version + 2)) - - # other exp should see only original value from server - exp2 = get_next_exp() - assert exp2[key].fetch() == original_value - - # run neptune sync - result = runner.invoke(sync, ["--path", path]) - assert result.exit_code == 0 - - # other exp should see updated value from server - exp3 = get_next_exp() - assert exp3[key].fetch() == updated_value - assert exp3["copy/" + key].fetch() == updated_value + with reinitialize_container( + container_sys_id, container_type, project=environment.project + ) as container: + # and we should get the updated value from server + assert container[key].fetch() == updated_value + assert container["copy/" + key].fetch() == updated_value def test_offline_sync(self, environment): with tmp_context() as tmp: @@ -167,10 +135,20 @@ def test_offline_sync(self, environment): result = runner.invoke(sync, ["--path", tmp, "-p", environment.project]) assert result.exit_code == 0 - # offline mode doesn't support custom_run_id, we'll have to parse sync output to determine short_id + # we'll have to parse sync output to determine short_id sys_id_found = re.search(self.SYNCHRONIZED_SYSID_RE, result.stdout) assert len(sys_id_found.groups()) == 1 sys_id = sys_id_found.group(1) - run2 = neptune.init(run=sys_id, project=environment.project) + run2 = neptune.init_run(run=sys_id, project=environment.project) assert run2[key].fetch() == val + + @pytest.mark.parametrize("container_type", ["model", "model_version", "project"]) + def test_cannot_offline_non_runs(self, environment, container_type): + with pytest.raises(NeptuneException) as e: + initialize_container( + container_type=container_type, + project=environment.project, + mode="offline", + ) + assert "can't be initialized in OFFLINE mode" in str(e.value) diff --git a/e2e_tests/utils.py b/e2e_tests/utils.py index 18d6f9739..27f4d802a 100644 --- a/e2e_tests/utils.py +++ b/e2e_tests/utils.py @@ -13,10 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__all__ = ["with_check_if_file_appears", "tmp_context", "a_project_name", "Environment"] +__all__ = [ + "with_check_if_file_appears", + "tmp_context", + "a_project_name", + "a_key", + "Environment", + "initialize_container", + "reinitialize_container", +] import io import os +import string import random import tempfile from datetime import datetime @@ -27,6 +36,8 @@ from PIL import Image from PIL.PngImagePlugin import PngImageFile +import neptune.new as neptune + def _remove_file_if_exists(filepath): try: @@ -85,11 +96,13 @@ def image_to_png(*, image: Image) -> PngImageFile: return PngImageFile(png_buf) +def a_key(): + return "".join(random.choices(string.ascii_uppercase, k=10)) + + def a_project_name(project_slug: str): project_name = f"e2e-{datetime.now().strftime('%Y%m%d-%H%M')}-{project_slug}" - project_key = "".join( - random.choices(population=project_slug.replace("-", ""), k=10) - ).upper() + project_key = a_key() return project_name, project_key @@ -98,3 +111,42 @@ def a_project_name(project_slug: str): "Environment", ["workspace", "project", "user_token", "admin_token", "admin", "user"], ) + + +def initialize_container(container_type, project, **extra_args): + if container_type == "project": + return neptune.init_project(name=project, **extra_args) + + if container_type == "run": + return neptune.init_run(project=project, **extra_args) + + if container_type == "model": + return neptune.init_model(key=a_key(), project=project, **extra_args) + + if container_type == "model_version": + model = neptune.init_model(key=a_key(), project=project, **extra_args) + model_sys_id = model["sys/id"].fetch() + model.stop() + + return neptune.init_model_version( + model=model_sys_id, project=project, **extra_args + ) + + raise NotImplementedError(container_type) + + +def reinitialize_container(sys_id: str, container_type: str, project: str): + if container_type == "project": + # exactly same as initialize_container(project), for convenience + return neptune.init_project(name=project) + + if container_type == "run": + return neptune.init_run(run=sys_id, project=project) + + if container_type == "model": + return neptune.init_model(model=sys_id, project=project) + + if container_type == "model_version": + return neptune.init_model_version(version=sys_id, project=project) + + raise NotImplementedError() diff --git a/neptune/new/__init__.py b/neptune/new/__init__.py index f1370c329..edb12be47 100644 --- a/neptune/new/__init__.py +++ b/neptune/new/__init__.py @@ -32,10 +32,11 @@ NeptuneUninitializedException, NeptunePossibleLegacyUsageException, ) -from neptune.new.run import Run -from neptune.new.internal.init_impl import ( - __version__, +from neptune.new.metadata_containers import Run +from neptune.new.internal.init import ( get_project, + init_model, + init_model_version, init_project, init_run, init, diff --git a/neptune/new/attributes/atoms/copiable_atom.py b/neptune/new/attributes/atoms/copiable_atom.py index 6456d2a8a..f950b53c0 100644 --- a/neptune/new/attributes/atoms/copiable_atom.py +++ b/neptune/new/attributes/atoms/copiable_atom.py @@ -33,14 +33,14 @@ def copy(self, value: ValueCopy, wait: bool = False): # pylint: disable=protected-access with self._container.lock(): source_path = value.source_handler._path - source_attr = value.source_handler._run.get_attribute(source_path) + source_attr = value.source_handler._get_attribute() self._enqueue_operation( CopyAttribute( self._path, - value.source_handler._container_id, - value.source_handler._container_type, - parse_path(source_path), - source_attr.__class__, + container_id=source_attr._container_id, + container_type=source_attr._container_type, + source_path=parse_path(source_path), + source_attr_cls=source_attr.__class__, ), wait, ) diff --git a/neptune/new/attributes/atoms/string.py b/neptune/new/attributes/atoms/string.py index 1da947537..bd0d27b51 100644 --- a/neptune/new/attributes/atoms/string.py +++ b/neptune/new/attributes/atoms/string.py @@ -24,7 +24,7 @@ from neptune.new.types.atoms.string import String as StringVal if typing.TYPE_CHECKING: - from neptune.new.attribute_container import AttributeContainer + from neptune.new.metadata_containers import MetadataContainer from neptune.new.internal.backends.neptune_backend import NeptuneBackend @@ -32,7 +32,7 @@ class String(CopiableAtom): MAX_VALUE_LENGTH = 16384 - def __init__(self, container: "AttributeContainer", path: typing.List[str]): + def __init__(self, container: "MetadataContainer", path: typing.List[str]): super().__init__(container, path) self._value_truncation_occurred = False diff --git a/neptune/new/attributes/attribute.py b/neptune/new/attributes/attribute.py index f0c18afb0..fe0cfb7f7 100644 --- a/neptune/new/attributes/attribute.py +++ b/neptune/new/attributes/attribute.py @@ -15,26 +15,26 @@ # from typing import List, TYPE_CHECKING +from neptune.new.exceptions import TypeDoesNotSupportAttributeException from neptune.new.internal.backends.neptune_backend import NeptuneBackend - from neptune.new.internal.operation import Operation from neptune.new.types.value_copy import ValueCopy if TYPE_CHECKING: - from neptune.new.attribute_container import AttributeContainer + from neptune.new.metadata_containers import MetadataContainer from neptune.new.internal.container_type import ContainerType class Attribute: supports_copy = False - def __init__(self, container: "AttributeContainer", path: List[str]): + def __init__(self, container: "MetadataContainer", path: List[str]): super().__init__() self._container = container self._path = path def __getattr__(self, attr): - raise AttributeError("{} has no attribute {}.".format(type(self), attr)) + raise TypeDoesNotSupportAttributeException(type_=type(self), attribute=attr) def _enqueue_operation(self, operation: Operation, wait: bool): # pylint: disable=protected-access diff --git a/neptune/new/attributes/constants.py b/neptune/new/attributes/constants.py index 039e0933f..5cf39a394 100644 --- a/neptune/new/attributes/constants.py +++ b/neptune/new/attributes/constants.py @@ -38,6 +38,7 @@ SYSTEM_STATE_ATTRIBUTE_PATH = f"{SYSTEM_ATTRIBUTE_SPACE}state" SYSTEM_TAGS_ATTRIBUTE_PATH = f"{SYSTEM_ATTRIBUTE_SPACE}tags" SYSTEM_FAILED_ATTRIBUTE_PATH = f"{SYSTEM_ATTRIBUTE_SPACE}failed" +SYSTEM_STAGE_ATTRIBUTE_PATH = f"{SYSTEM_ATTRIBUTE_SPACE}stage" SIGNAL_TYPE_STOP = "neptune/stop" SIGNAL_TYPE_ABORT = "neptune/abort" diff --git a/neptune/new/attributes/namespace.py b/neptune/new/attributes/namespace.py index c83dce49f..ab2aed59d 100644 --- a/neptune/new/attributes/namespace.py +++ b/neptune/new/attributes/namespace.py @@ -17,7 +17,7 @@ from typing import Any, Dict, TYPE_CHECKING, Iterator, List, Mapping, Union from neptune.new.attributes.attribute import Attribute -from neptune.new.internal.run_structure import ContainerStructure +from neptune.new.internal.container_structure import ContainerStructure from neptune.new.internal.utils.generic_attribute_mapper import ( atomic_attribute_types_map, NoValue, @@ -26,13 +26,13 @@ from neptune.new.types.namespace import Namespace as NamespaceVal if TYPE_CHECKING: - from neptune.new.attribute_container import AttributeContainer + from neptune.new.metadata_containers import MetadataContainer RunStructure = ContainerStructure # backwards compatibility class Namespace(Attribute, MutableMapping): - def __init__(self, container: "AttributeContainer", path: List[str]): + def __init__(self, container: "MetadataContainer", path: List[str]): Attribute.__init__(self, container, path) self._attributes = {} self._str_path = path_to_str(path) @@ -94,7 +94,7 @@ def fetch(self) -> dict: class NamespaceBuilder: - def __init__(self, container: "AttributeContainer"): + def __init__(self, container: "MetadataContainer"): self._run = container def __call__(self, path: List[str]) -> Namespace: diff --git a/neptune/new/attributes/series/string_series.py b/neptune/new/attributes/series/string_series.py index 295bdd243..96b5b61f5 100644 --- a/neptune/new/attributes/series/string_series.py +++ b/neptune/new/attributes/series/string_series.py @@ -27,7 +27,7 @@ from neptune.utils import split_to_chunks if TYPE_CHECKING: - from neptune.new.attribute_container import AttributeContainer + from neptune.new.metadata_containers import MetadataContainer Val = StringSeriesVal Data = str @@ -36,7 +36,7 @@ class StringSeries(Series[Val, Data], FetchableSeries[StringSeriesValues]): - def __init__(self, container: "AttributeContainer", path: List[str]): + def __init__(self, container: "MetadataContainer", path: List[str]): super().__init__(container, path) self._value_truncation_occurred = False diff --git a/neptune/new/attributes/utils.py b/neptune/new/attributes/utils.py index 359055afb..c0149efb8 100644 --- a/neptune/new/attributes/utils.py +++ b/neptune/new/attributes/utils.py @@ -36,7 +36,7 @@ from neptune.new.internal.backends.api_model import AttributeType if TYPE_CHECKING: - from neptune.new.attribute_container import AttributeContainer + from neptune.new.metadata_containers import MetadataContainer from neptune.new.attributes.attribute import Attribute _attribute_type_to_attr_class_map = { @@ -60,11 +60,11 @@ def create_attribute_from_type( attribute_type: AttributeType, - run: "AttributeContainer", + container: "MetadataContainer", path: List[str], ) -> "Attribute": try: - return _attribute_type_to_attr_class_map[attribute_type](run, path) + return _attribute_type_to_attr_class_map[attribute_type](container, path) except KeyError: raise InternalClientError(f"Unexpected type: {attribute_type}") diff --git a/neptune/new/exceptions.py b/neptune/new/exceptions.py index cf4ee6c65..df56c5b7d 100644 --- a/neptune/new/exceptions.py +++ b/neptune/new/exceptions.py @@ -23,6 +23,7 @@ from neptune.new.envs import CUSTOM_RUN_ID_ENV_NAME from neptune.new.internal.backends.api_model import Project, Workspace from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.id_formats import QualifiedName from neptune.new.internal.utils import replace_patch_version @@ -71,6 +72,24 @@ def __str__(self): return self._msg +class TypeDoesNotSupportAttributeException(NeptuneException, AttributeError): + def __init__(self, type_, attribute): + message = """ +{h1} +----TypeDoesNotSupportAttributeException---------------------------------------- +{end} +{type} has no attribute {attribute}. + +{correct}Need help?{end}-> https://docs.neptune.ai/getting-started/getting-help +""" + self._msg = message.format(type=type_, attribute=attribute, **STYLES) + super().__init__(self._msg) + + def __str__(self): + # required because of overriden `__str__` in `KeyError` + return self._msg + + class MalformedOperation(NeptuneException): pass @@ -126,6 +145,60 @@ def __init__(self, status, response): super().__init__(message.format(status=status, response=response, **STYLES)) +class MetadataContainerNotFound(NeptuneException): + container_id: str + container_type: ContainerType + + def __init__(self, container_id: str, container_type: Optional[ContainerType]): + self.container_id = container_id + self.container_type = container_type + container_type_str = ( + container_type.value.capitalize() if container_type else "object" + ) + super().__init__("{} {} not found.".format(container_type_str, container_id)) + + @classmethod + def of_container_type( + cls, container_type: Optional[ContainerType], container_id: str + ): + if container_type is None: + return MetadataContainerNotFound( + container_id=container_id, container_type=None + ) + elif container_type == ContainerType.PROJECT: + return ProjectNotFound(project_id=container_id) + elif container_type == ContainerType.RUN: + return RunNotFound(run_id=container_id) + elif container_type == ContainerType.MODEL: + return ModelNotFound(model_id=container_id) + elif container_type == ContainerType.MODEL_VERSION: + return ModelVersionNotFound(model_version_id=container_id) + else: + raise InternalClientError(f"Unexpected ContainerType: {container_type}") + + +class ProjectNotFound(MetadataContainerNotFound): + def __init__(self, project_id: str): + super().__init__(container_id=project_id, container_type=ContainerType.PROJECT) + + +class RunNotFound(MetadataContainerNotFound): + def __init__(self, run_id: str): + super().__init__(container_id=run_id, container_type=ContainerType.RUN) + + +class ModelNotFound(MetadataContainerNotFound): + def __init__(self, model_id: str): + super().__init__(container_id=model_id, container_type=ContainerType.MODEL) + + +class ModelVersionNotFound(MetadataContainerNotFound): + def __init__(self, model_version_id: str): + super().__init__( + container_id=model_version_id, container_type=ContainerType.MODEL_VERSION + ) + + class ExceptionWithProjectsWorkspacesListing(NeptuneException): def __init__( self, @@ -176,10 +249,30 @@ def __init__( ) -class ProjectNotFound(ExceptionWithProjectsWorkspacesListing): +class ContainerUUIDNotFound(NeptuneException): + container_id: str + container_type: ContainerType + + def __init__(self, container_id: str, container_type: ContainerType): + self.container_id = container_id + self.container_type = container_type + super().__init__( + "{} with ID {} not found. Could be deleted.".format( + container_type.value.capitalize(), container_id + ) + ) + + +# for backward compatibility +RunUUIDNotFound = ContainerUUIDNotFound + + +class ProjectNotFoundWithSuggestions( + ExceptionWithProjectsWorkspacesListing, ProjectNotFound +): def __init__( self, - project_id: str, + project_id: QualifiedName, available_projects: List[Project] = (), available_workspaces: List[Workspace] = (), ): @@ -270,51 +363,6 @@ def __init__( ) -class RunNotFound(NeptuneException): - def __init__(self, run_id: str) -> None: - super().__init__("Run {} not found.".format(run_id)) - - -class ContainerUUIDNotFound(NeptuneException): - container_id: str - container_type: ContainerType - - def __init__(self, container_id: str, container_type: ContainerType): - self.container_id = container_id - self.container_type = container_type - super().__init__( - "{} with ID {} not found. Could be deleted.".format( - container_type.value.capitalize(), container_id - ) - ) - - -def raise_container_not_found( - container_id: str, container_type: ContainerType, from_exception: Exception = None -): - if container_type == ContainerType.RUN: - error_class = RunUUIDNotFound - elif container_type == ContainerType.PROJECT: - error_class = ProjectUUIDNotFound - else: - raise InternalClientError(f"Unknown container_type: {container_type}") - - if from_exception: - raise error_class(container_id) from from_exception - else: - raise error_class(container_id) - - -class RunUUIDNotFound(ContainerUUIDNotFound): - def __init__(self, container_id: str): - super().__init__(container_id, container_type=ContainerType.RUN) - - -class ProjectUUIDNotFound(ContainerUUIDNotFound): - def __init__(self, container_id: str): - super().__init__(container_id, container_type=ContainerType.PROJECT) - - class InactiveContainerException(NeptuneException): resume_info: str @@ -324,10 +372,13 @@ def __init__(self, container_type: ContainerType, label: str): ----{cls}---------------------------------------- {end} It seems you are trying to log (or fetch) metadata to a {container_type} that was stopped ({label}). + What should I do?{resume_info} + You may also want to check the following docs pages: - https://docs.neptune.ai/api-reference/{container_type}#.stop - https://docs.neptune.ai/you-should-know/connection-modes + {correct}Need help?{end}-> https://docs.neptune.ai/getting-started/getting-help """ super().__init__( @@ -345,19 +396,43 @@ class InactiveRunException(InactiveContainerException): resume_info = """ - Resume the run to continue logging to it: https://docs.neptune.ai/how-to-guides/neptune-api/resume-run#how-to-resume-run - - Don't invoke `stop()` on a {container_type} that you want to access. If you want to stop monitoring only, - you can resume a {container_type} in read-only mode: + - Don't invoke `stop()` on a run that you want to access. If you want to stop monitoring only, + you can resume a run in read-only mode: https://docs.neptune.ai/you-should-know/connection-modes#read-only""" def __init__(self, label: str): super().__init__(label=label, container_type=ContainerType.RUN) +class InactiveModelException(InactiveContainerException): + resume_info = """ + - Resume the model to continue logging to it: + https://docs.neptune.ai/api-reference/neptune#.init_model + - Don't invoke `stop()` on a model that you want to access. If you want to stop monitoring only, + you can resume a model in read-only mode: + https://docs.neptune.ai/you-should-know/connection-modes#read-only""" + + def __init__(self, label: str): + super().__init__(label=label, container_type=ContainerType.MODEL) + + +class InactiveModelVersionException(InactiveContainerException): + resume_info = """ + - Resume the model version to continue logging to it: + https://docs.neptune.ai/api-reference/neptune#.init_model_version + - Don't invoke `stop()` on a model version that you want to access. If you want to stop monitoring only, + you can resume a model version in read-only mode: + https://docs.neptune.ai/you-should-know/connection-modes#read-only""" + + def __init__(self, label: str): + super().__init__(label=label, container_type=ContainerType.MODEL_VERSION) + + class InactiveProjectException(InactiveContainerException): resume_info = """ - Initialize connection to the project again to continue logging to it: https://docs.neptune.ai/api-reference/neptune#.init_project - - Don't invoke `stop()` on a {container_type} that you want to access.""" + - Don't invoke `stop()` on a project that you want to access.""" def __init__(self, label: str): super().__init__(label=label, container_type=ContainerType.PROJECT) @@ -455,15 +530,18 @@ def __init__(self): super().__init__("Cannot synchronize offline runs without a project.") -class NeedExistingRunForReadOnlyMode(NeptuneException): - def __init__(self): +class NeedExistingExperimentForReadOnlyMode(NeptuneException): + container_type: ContainerType + callback_name: str + + def __init__(self, container_type: ContainerType, callback_name: str): message = """ {h1} -----NeedExistingRunForReadOnlyMode----------------------------------------- +----{class_name}----------------------------------------- {end} -Read-only mode can be used only with an existing run. +Read-only mode can be used only with an existing {container_type}. -Parameter {python}run{end} of {python}neptune.init(){end} must be provided and reference +Parameter {python}{container_type}{end} of {python}{callback_name}{end} must be provided and reference an existing run when using {python}mode="read-only"{end}. You may also want to check the following docs pages: @@ -472,10 +550,45 @@ def __init__(self): {correct}Need help?{end}-> https://docs.neptune.ai/getting-started/getting-help """ - super().__init__(message.format(**STYLES)) + self.container_type = container_type + self.callback_name = callback_name + super().__init__( + message.format( + class_name=type(self).__name__, + container_type=self.container_type.value, + callback_name=self.callback_name, + **STYLES, + ) + ) + + +class NeedExistingRunForReadOnlyMode(NeedExistingExperimentForReadOnlyMode): + def __init__(self): + super().__init__( + container_type=ContainerType.RUN, callback_name="neptune.init_run" + ) + + +class NeedExistingModelForReadOnlyMode(NeedExistingExperimentForReadOnlyMode): + def __init__(self): + super().__init__( + container_type=ContainerType.MODEL, callback_name="neptune.init_model" + ) -class NeptuneRunResumeAndCustomIdCollision(NeptuneException): +class NeedExistingModelVersionForReadOnlyMode(NeedExistingExperimentForReadOnlyMode): + def __init__(self): + super().__init__( + container_type=ContainerType.MODEL_VERSION, + callback_name="neptune.init_model_version", + ) + + +class NeptuneWrongInitParametersException(NeptuneException): + pass + + +class NeptuneRunResumeAndCustomIdCollision(NeptuneWrongInitParametersException): def __init__(self): message = """ {h1} @@ -527,6 +640,31 @@ def __init__( ) +class NeptuneMissingRequiredInitParameter(NeptuneWrongInitParametersException): + def __init__( + self, + called_function: str, + parameter_name: str, + ): + message = """ +{h1} +----NeptuneMissingRequiredInitParameter--------------------------------------- +{end} +{python}neptune.{called_function}(){end} invocation was missing {python}{parameter_name}{end}. +If you want to create a new object using {python}{called_function}{end}, {python}{parameter_name}{end} is required: +https://docs.neptune.ai/api-reference/neptune#.{called_function} + +{correct}Need help?{end}-> https://docs.neptune.ai/getting-started/getting-help +""" + super().__init__( + message.format( + called_function=called_function, + parameter_name=parameter_name, + **STYLES, + ) + ) + + class CannotResolveHostname(NeptuneException): def __init__(self, host): message = """ @@ -695,7 +833,11 @@ def __init__(self): super().__init__(message.format(**STYLES)) -class NeptuneOfflineModeFetchException(NeptuneException): +class NeptuneOfflineModeException(NeptuneException): + pass + + +class NeptuneOfflineModeFetchException(NeptuneOfflineModeException): def __init__(self): message = """ {h1} @@ -715,13 +857,54 @@ def __init__(self): super().__init__(message.format(**STYLES)) +class NeptuneOfflineModeChangeStageException(NeptuneOfflineModeException): + def __init__(self): + message = """ +{h1} +----NeptuneOfflineModeChangeStageException--------------------------------------- +{end} +You cannot change the stage of the model version while in OFFLINE mode. +""" + super().__init__(message.format(**STYLES)) + + +class NeptuneProtectedPathException(NeptuneException): + extra_info = "" + + def __init__(self, path: str): + message = """ +{h1} +----NeptuneProtectedPathException---------------------------------------------- +{end} +Field {path} cannot be changed directly. +{extra_info} + +{correct}Need help?{end}-> https://docs.neptune.ai/getting-started/getting-help +""" + self._path = path + super().__init__( + message.format( + path=path, + extra_info=self.extra_info.format(**STYLES), + **STYLES, + ) + ) + + +class NeptuneCannotChangeStageManually(NeptuneProtectedPathException): + extra_info = """ +If you want to change the stage of the model version, +use the {python}.change_stage(){end} function: + {python}model_version.change_stage("staging"){end}""" + + class OperationNotSupported(NeptuneException): def __init__(self, message: str): super().__init__(f"Operation not supported: {message}") class NeptuneLegacyProjectException(NeptuneException): - def __init__(self, project: str): + def __init__(self, project: QualifiedName): message = """ {h1} ----NeptuneLegacyProjectException--------------------------------------------------------- @@ -891,7 +1074,7 @@ def __init__(self, matplotlib_version, plotly_version): ) -class NeptunePossibleLegacyUsageException(NeptuneException): +class NeptunePossibleLegacyUsageException(NeptuneWrongInitParametersException): def __init__(self): message = """ {h1} @@ -1062,3 +1245,26 @@ def __init__(self, missing_feature): """ self.message = message.format(missing_feature=missing_feature, **STYLES) super().__init__(message) + + +class NeptuneObjectCreationConflict(NeptuneException): + pass + + +class NeptuneModelKeyAlreadyExistsError(NeptuneObjectCreationConflict): + def __init__(self, model_key, models_tab_url): + message = """ +{h1} +----NeptuneModelKeyAlreadyExistsError--------------------------------------------------- +{end} +A model with the provided key ({model_key}) already exists in this project. A model key has to be unique +within the project. + +You can check all of your models in the project on the Models page: +{models_tab_url} + +{correct}Need help?{end}-> https://docs.neptune.ai/getting-started/getting-help +""" + super().__init__( + message.format(model_key=model_key, models_tab_url=models_tab_url, **STYLES) + ) diff --git a/neptune/new/handler.py b/neptune/new/handler.py index a98acbcc4..23b2865fc 100644 --- a/neptune/new/handler.py +++ b/neptune/new/handler.py @@ -13,16 +13,27 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from functools import wraps from typing import Optional, TYPE_CHECKING, Union, Iterable, List + +# backwards compatibility +# pylint: disable=unused-import +from neptune.new.exceptions import NeptuneException + from neptune.new.attributes import File from neptune.new.attributes.atoms.artifact import Artifact +from neptune.new.attributes.constants import SYSTEM_STAGE_ATTRIBUTE_PATH from neptune.new.attributes.file_set import FileSet +from neptune.new.attributes.namespace import Namespace from neptune.new.attributes.series import FileSeries from neptune.new.attributes.series.float_series import FloatSeries from neptune.new.attributes.series.string_series import StringSeries from neptune.new.attributes.sets.string_set import StringSet -from neptune.new.exceptions import NeptuneException +from neptune.new.exceptions import ( + MissingFieldException, + NeptuneCannotChangeStageManually, +) from neptune.new.internal.artifacts.types import ArtifactFileData from neptune.new.internal.utils import ( verify_type, @@ -38,43 +49,60 @@ from neptune.new.types.value_copy import ValueCopy if TYPE_CHECKING: - from neptune.new.run import Run + from neptune.new.metadata_containers import MetadataContainer + + +def validate_path_not_protected(target_path: str, handler: "Handler"): + # pylint: disable=protected-access + path_protection_exception = handler._PROTECTED_PATHS.get(target_path) + if path_protection_exception: + raise path_protection_exception(target_path) + + +def check_protected_paths(fun): + @wraps(fun) + def inner_fun(self: "Handler", *args, **kwargs): + # pylint: disable=protected-access + validate_path_not_protected(self._path, self) + return fun(self, *args, **kwargs) + + return inner_fun class Handler: - def __init__(self, run: "Run", path: str): + # paths which can't be modified by client directly + _PROTECTED_PATHS = { + SYSTEM_STAGE_ATTRIBUTE_PATH: NeptuneCannotChangeStageManually, + } + + def __init__(self, container: "MetadataContainer", path: str): super().__init__() - self._run = run + self._container = container self._path = path def __repr__(self): - attr = self._run.get_attribute(self._path) + attr = self._container.get_attribute(self._path) formal_type = type(attr).__name__ if attr else "Unassigned" return f'<{formal_type} field at "{self._path}">' def _ipython_key_completions_(self): # pylint: disable=protected-access - return self._run._get_subpath_suggestions(path_prefix=self._path) + return self._container._get_subpath_suggestions(path_prefix=self._path) def __getitem__(self, path: str) -> "Handler": - return Handler(self._run, join_paths(self._path, path)) + return Handler(self._container, join_paths(self._path, path)) def __setitem__(self, key: str, value) -> None: self[key].assign(value) - def __getattr__(self, attribute_name): - attr = self._run.get_attribute(self._path) - if attr: - return getattr(attr, attribute_name) - else: - raise AttributeError(f"No such method '{attribute_name}'.") - - def __getattribute__(self, attribute_name): - _docstring_attrs = super().__getattribute__("DOCSTRING_ATTRIBUTES") - if attribute_name in _docstring_attrs: - raise AttributeError(f"No such method '{attribute_name}'.") - return super().__getattribute__(attribute_name) + def _get_attribute(self): + """Returns Attribute defined in `self._path` or throws MissingFieldException""" + attr = self._container.get_attribute(self._path) + if attr is None: + raise MissingFieldException(self._path) + return attr + @check_protected_paths def assign(self, value, wait: bool = False) -> None: """Assigns the provided value to the field. @@ -116,15 +144,16 @@ def assign(self, value, wait: bool = False) -> None: .. _Field types docs page: https://docs.neptune.ai/api-reference/field-types """ - with self._run.lock(): - attr = self._run.get_attribute(self._path) + with self._container.lock(): + attr = self._container.get_attribute(self._path) if attr: if isinstance(value, Handler): value = ValueCopy(value) attr.process_assignment(value, wait) else: - self._run.define(self._path, value, wait) + self._container.define(self._path, value, wait) + @check_protected_paths def upload(self, value, wait: bool = False) -> None: """Uploads provided file under specified field path. @@ -158,13 +187,14 @@ def upload(self, value, wait: bool = False) -> None: """ value = FileVal.create_from(value) - with self._run.lock(): - attr = self._run.get_attribute(self._path) + with self._container.lock(): + attr = self._container.get_attribute(self._path) if not attr: - attr = File(self._run, parse_path(self._path)) - self._run.set_attribute(self._path, attr) + attr = File(self._container, parse_path(self._path)) + self._container.set_attribute(self._path, attr) attr.upload(value, wait) + @check_protected_paths def upload_files( self, value: Union[str, Iterable[str]], wait: bool = False ) -> None: @@ -173,13 +203,14 @@ def upload_files( else: verify_type("value", value, str) - with self._run.lock(): - attr = self._run.get_attribute(self._path) + with self._container.lock(): + attr = self._container.get_attribute(self._path) if not attr: - attr = FileSet(self._run, parse_path(self._path)) - self._run.set_attribute(self._path, attr) + attr = FileSet(self._container, parse_path(self._path)) + self._container.set_attribute(self._path, attr) attr.upload_files(value, wait) + @check_protected_paths def log( self, value, @@ -215,8 +246,8 @@ def log( verify_type("step", step, (int, float, type(None))) verify_type("timestamp", timestamp, (int, float, type(None))) - with self._run.lock(): - attr = self._run.get_attribute(self._path) + with self._container.lock(): + attr = self._container.get_attribute(self._path) if not attr: if is_collection(value): if value: @@ -229,23 +260,24 @@ def log( first_value = value if is_float(first_value): - attr = FloatSeries(self._run, parse_path(self._path)) + attr = FloatSeries(self._container, parse_path(self._path)) elif is_string(first_value): - attr = StringSeries(self._run, parse_path(self._path)) + attr = StringSeries(self._container, parse_path(self._path)) elif FileVal.is_convertable(first_value): - attr = FileSeries(self._run, parse_path(self._path)) + attr = FileSeries(self._container, parse_path(self._path)) elif is_float_like(first_value): - attr = FloatSeries(self._run, parse_path(self._path)) + attr = FloatSeries(self._container, parse_path(self._path)) elif is_string_like(first_value): - attr = StringSeries(self._run, parse_path(self._path)) + attr = StringSeries(self._container, parse_path(self._path)) else: raise TypeError( "Value of unsupported type {}".format(type(first_value)) ) - self._run.set_attribute(self._path, attr) + self._container.set_attribute(self._path, attr) attr.log(value, step=step, timestamp=timestamp, wait=wait, **kwargs) + @check_protected_paths def add(self, values: Union[str, Iterable[str]], wait: bool = False) -> None: """Adds the provided tag or tags to the run's tags. @@ -263,34 +295,35 @@ def add(self, values: Union[str, Iterable[str]], wait: bool = False) -> None: https://docs.neptune.ai/api-reference/field-types#.add """ verify_type("values", values, (str, Iterable)) - with self._run.lock(): - attr = self._run.get_attribute(self._path) + with self._container.lock(): + attr = self._container.get_attribute(self._path) if not attr: - attr = StringSet(self._run, parse_path(self._path)) - self._run.set_attribute(self._path, attr) + attr = StringSet(self._container, parse_path(self._path)) + self._container.set_attribute(self._path, attr) attr.add(values, wait) + @check_protected_paths def pop(self, path: str = None, wait: bool = False) -> None: - if path: - verify_type("path", path, str) - self._run.pop(join_paths(self._path, path), wait) - else: - self._run.pop(self._path, wait) - - # Following attributes are implemented only for docstring hints and autocomplete - DOCSTRING_ATTRIBUTES = [ - "remove", - "clear", - "fetch", - "fetch_last", - "fetch_values", - "delete_files", - "download", - "download_last", - "fetch_hash", - "fetch_files_list", - ] + # pylint: disable=protected-access + with self._container.lock(): + handler = self + if path: + verify_type("path", path, str) + handler = self[path] + path = join_paths(self._path, path) + # extra check: check_protected_paths decorator does not catch flow with non-null path + validate_path_not_protected(path, self) + else: + path = self._path + attribute = self._container.get_attribute(path) + if isinstance(attribute, Namespace): + for child_path in list(attribute): + handler.pop(child_path, wait) + else: + self._container._pop_impl(parse_path(path), wait) + + @check_protected_paths def remove(self, values: Union[str, Iterable[str]], wait: bool = False) -> None: """Removes the provided tag or tags from the set. @@ -305,8 +338,9 @@ def remove(self, values: Union[str, Iterable[str]], wait: bool = False) -> None: .. _remove docs page: https://docs.neptune.ai/api-reference/field-types#.remove """ - raise NeptuneException("Should be never called.") + return self._pass_call_to_attr(function_name="remove", values=values, wait=wait) + @check_protected_paths def clear(self, wait: bool = False): """Removes all tags from the `StringSet`. @@ -320,7 +354,7 @@ def clear(self, wait: bool = False): .. _clear docs page: https://docs.neptune.ai/api-reference/field-types#.clear """ - raise NeptuneException("Should be never called.") + return self._pass_call_to_attr(function_name="clear", wait=wait) def fetch(self): """Fetches fields value or in case of a namespace fetches values of all non-File Atom fields as a dictionary. @@ -340,7 +374,7 @@ def fetch(self): .. _Field types docs page: https://docs.neptune.ai/api-reference/field-types """ - raise NeptuneException("Should be never called.") + return self._pass_call_to_attr(function_name="fetch") def fetch_last(self): """Fetches last value stored in the series from Neptune servers. @@ -355,7 +389,7 @@ def fetch_last(self): .. _Field types docs page: https://docs.neptune.ai/api-reference/field-types """ - raise NeptuneException("Should be never called.") + return self._pass_call_to_attr(function_name="fetch_last") def fetch_values(self, include_timestamp: Optional[bool] = True): """Fetches all values stored in the series from Neptune servers. @@ -374,8 +408,11 @@ def fetch_values(self, include_timestamp: Optional[bool] = True): .. _Field types docs page: https://docs.neptune.ai/api-reference/field-types """ - raise NeptuneException("Should be never called.") + return self._pass_call_to_attr( + function_name="fetch_values", include_timestamp=include_timestamp + ) + @check_protected_paths def delete_files( self, paths: Union[str, Iterable[str]], wait: bool = False ) -> None: @@ -395,8 +432,11 @@ def delete_files( .. _delete_files docs page: https://docs.neptune.ai/api-reference/field-types#.delete_files """ - raise NeptuneException("Should be never called.") + return self._pass_call_to_attr( + function_name="delete_files", paths=paths, wait=wait + ) + @check_protected_paths def download(self, destination: str = None) -> None: """Downloads the stored file or files to the working directory or specified destination. @@ -417,7 +457,9 @@ def download(self, destination: str = None) -> None: .. _Field types docs page: https://docs.neptune.ai/api-reference/field-types """ - raise NeptuneException("Should be never called.") + return self._pass_call_to_attr( + function_name="download", destination=destination + ) def download_last(self, destination: str = None) -> None: """Downloads the stored file or files to the working directory or specified destination. @@ -435,7 +477,9 @@ def download_last(self, destination: str = None) -> None: .. _download_last docs page: https://docs.neptune.ai/api-reference/field-types#.download_last """ - raise NeptuneException("Should be never called.") + return self._pass_call_to_attr( + function_name="download_last", destination=destination + ) def fetch_hash(self) -> str: """Fetches the hash of an artifact. @@ -443,7 +487,7 @@ def fetch_hash(self) -> str: You may also want to check `fetch_hash docs page`_. https://docs.neptune.ai/api-reference/field-types#.fetch_hash """ - raise NeptuneException("Should be never called.") + return self._pass_call_to_attr(function_name="fetch_hash") def fetch_files_list(self) -> List[ArtifactFileData]: """Fetches the list of files in an artifact and their metadata. @@ -451,8 +495,12 @@ def fetch_files_list(self) -> List[ArtifactFileData]: You may also want to check `fetch_files_list docs page`_. https://docs.neptune.ai/api-reference/field-types#.fetch_files_list """ - raise NeptuneException("Should be never called.") + return self._pass_call_to_attr(function_name="fetch_files_list") + + def _pass_call_to_attr(self, function_name, **kwargs): + return getattr(self._get_attribute(), function_name)(**kwargs) + @check_protected_paths def track_files( self, path: str, destination: str = None, wait: bool = False ) -> None: @@ -461,12 +509,12 @@ def track_files( You may also want to check `track_files docs page`_. https://docs.neptune.ai/api-reference/field-types#.track_files """ - with self._run.lock(): - attr = self._run.get_attribute(self._path) + with self._container.lock(): + attr = self._container.get_attribute(self._path) if not attr: - attr = Artifact(self._run, parse_path(self._path)) + attr = Artifact(self._container, parse_path(self._path)) - self._run.set_attribute(self._path, attr) + self._container.set_attribute(self._path, attr) attr.track_files(path=path, destination=destination, wait=wait) diff --git a/neptune/new/internal/backends/api_model.py b/neptune/new/internal/backends/api_model.py index 42acf66c3..57ca8b2ab 100644 --- a/neptune/new/internal/backends/api_model.py +++ b/neptune/new/internal/backends/api_model.py @@ -20,27 +20,43 @@ from packaging import version +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.id_formats import UniqueId, SysId + +@dataclass class Project: - def __init__(self, _id: str, name: str, workspace: str): - self.id = _id - self.name = name - self.workspace = workspace + id: UniqueId + name: str + workspace: str + sys_id: SysId +@dataclass class Workspace: - def __init__(self, _id: str, name: str): - self.id = _id - self.name = name + id: UniqueId + name: str @dataclass -class ApiRun: - id: str - short_id: str +class ApiExperiment: + id: UniqueId + type: ContainerType + sys_id: SysId workspace: str project_name: str - trashed: bool + trashed: bool = False + + @classmethod + def from_experiment(cls, response_exp): + return cls( + id=response_exp.id, + type=ContainerType.from_api(response_exp.type), + sys_id=response_exp.shortId, + workspace=response_exp.organizationName, + project_name=response_exp.projectName, + trashed=response_exp.trashed, + ) class OptionalFeatures: diff --git a/neptune/new/internal/backends/hosted_neptune_backend.py b/neptune/new/internal/backends/hosted_neptune_backend.py index c374ba3ee..7c6cb5a9e 100644 --- a/neptune/new/internal/backends/hosted_neptune_backend.py +++ b/neptune/new/internal/backends/hosted_neptune_backend.py @@ -15,9 +15,15 @@ # import logging import re -from typing import Any, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING - -from bravado.exception import HTTPNotFound, HTTPPaymentRequired, HTTPUnprocessableEntity +import typing +from typing import Any, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING, Union + +from bravado.exception import ( + HTTPNotFound, + HTTPPaymentRequired, + HTTPUnprocessableEntity, + HTTPConflict, +) from neptune.new.exceptions import ( ArtifactNotFoundException, @@ -31,12 +37,14 @@ NeptuneLimitExceedException, ProjectNameCollision, ProjectNotFound, - RunNotFound, - raise_container_not_found, + MetadataContainerNotFound, + ProjectNotFoundWithSuggestions, + ContainerUUIDNotFound, + NeptuneObjectCreationConflict, ) from neptune.new.internal.artifacts.types import ArtifactFileData from neptune.new.internal.backends.api_model import ( - ApiRun, + ApiExperiment, ArtifactAttribute, Attribute, AttributeType, @@ -95,6 +103,7 @@ ) from neptune.new.internal.container_type import ContainerType from neptune.new.internal.credentials import Credentials +from neptune.new.internal.id_formats import QualifiedName, UniqueId from neptune.new.internal.operation import ( Operation, TrackFilesToArtifact, @@ -103,7 +112,7 @@ UploadFileSet, DeleteAttribute, ) -from neptune.new.internal.utils import base64_decode, verify_type +from neptune.new.internal.utils import base64_decode from neptune.new.internal.utils.generic_attribute_mapper import ( map_attribute_result_to_value, ) @@ -112,6 +121,7 @@ from neptune.new.types.atoms import GitRef from neptune.new.version import version as neptune_client_version from neptune.patterns import PROJECT_QUALIFIED_NAME_PATTERN +from neptune.new.internal.backends.nql import NQLQuery if TYPE_CHECKING: from bravado.requests_client import RequestsClient @@ -168,9 +178,7 @@ def websockets_factory( ) @with_api_exceptions_handler - def get_project(self, project_id: str) -> Project: - verify_type("project_id", project_id, str) - + def get_project(self, project_id: QualifiedName) -> Project: project_spec = re.search(PROJECT_QUALIFIED_NAME_PATTERN, project_id) workspace, name = project_spec["workspace"], project_spec["project"] @@ -191,7 +199,7 @@ def get_project(self, project_id: str) -> Project: project_id=project_id, available_projects=available_projects ) else: - raise ProjectNotFound( + raise ProjectNotFoundWithSuggestions( project_id=project_id, available_projects=self.get_available_projects(), available_workspaces=self.get_available_workspaces(), @@ -205,7 +213,12 @@ def get_project(self, project_id: str) -> Project: project_version = project.version if hasattr(project, "version") else 1 if project_version < 2: raise NeptuneLegacyProjectException(project_id) - return Project(project.id, project.name, project.organizationName) + return Project( + id=project.id, + name=project.name, + workspace=project.organizationName, + sys_id=project.projectKey, + ) except HTTPNotFound: available_workspaces = self.get_available_workspaces() @@ -213,13 +226,13 @@ def get_project(self, project_id: str) -> Project: filter(lambda aw: aw.name == workspace, available_workspaces) ): # Could not found specified workspace, forces listing all projects - raise ProjectNotFound( + raise ProjectNotFoundWithSuggestions( project_id=project_id, available_projects=self.get_available_projects(), available_workspaces=available_workspaces, ) else: - raise ProjectNotFound( + raise ProjectNotFoundWithSuggestions( project_id=project_id, available_projects=self.get_available_projects( workspace_id=workspace @@ -244,7 +257,10 @@ def get_available_projects( return list( map( lambda project: Project( - project.id, project.name, project.organizationName + id=project.id, + name=project.name, + workspace=project.organizationName, + sys_id=project.projectKey, ), projects, ) @@ -261,7 +277,7 @@ def get_available_workspaces(self) -> List[Workspace]: workspaces = response.result return list( map( - lambda workspace: Workspace(_id=workspace.id, name=workspace.name), + lambda workspace: Workspace(id=workspace.id, name=workspace.name), workspaces, ) ) @@ -269,32 +285,44 @@ def get_available_workspaces(self) -> List[Workspace]: return [] @with_api_exceptions_handler - def get_run(self, run_id: str): + def get_metadata_container( + self, + container_id: Union[UniqueId, QualifiedName], + expected_container_type: typing.Optional[ContainerType], + ) -> ApiExperiment: try: - run = ( + experiment = ( self.leaderboard_client.api.getExperiment( - experimentId=run_id, + experimentId=container_id, **DEFAULT_REQUEST_KWARGS, ) .response() .result ) - return ApiRun( - run.id, run.shortId, run.organizationName, run.projectName, run.trashed - ) + + if ( + expected_container_type is not None + and ContainerType.from_api(experiment.type) != expected_container_type + ): + raise MetadataContainerNotFound.of_container_type( + container_type=expected_container_type, container_id=container_id + ) + + return ApiExperiment.from_experiment(experiment) except HTTPNotFound: - raise RunNotFound(run_id) + raise MetadataContainerNotFound.of_container_type( + container_type=expected_container_type, container_id=container_id + ) @with_api_exceptions_handler def create_run( self, - project_id: str, + project_id: UniqueId, git_ref: Optional[GitRef] = None, custom_run_id: Optional[str] = None, notebook_id: Optional[str] = None, checkpoint_id: Optional[str] = None, - ) -> ApiRun: - verify_type("project_id", project_id, str) + ) -> ApiExperiment: git_info = ( { @@ -313,19 +341,65 @@ def create_run( else None ) - params = { - "projectIdentifier": project_id, - "cliVersion": str(neptune_client_version), + additional_params = { "gitInfo": git_info, "customId": custom_run_id, } if notebook_id is not None and checkpoint_id is not None: - params["notebookId"] = notebook_id if notebook_id is not None else None - params["checkpointId"] = ( + additional_params["notebookId"] = ( + notebook_id if notebook_id is not None else None + ) + additional_params["checkpointId"] = ( checkpoint_id if checkpoint_id is not None else None ) + return self._create_experiment( + project_id=project_id, + parent_id=project_id, + container_type=ContainerType.RUN, + additional_params=additional_params, + ) + + def create_model(self, project_id: UniqueId, key: str = "") -> ApiExperiment: + additional_params = { + "key": key, + } + + return self._create_experiment( + project_id=project_id, + parent_id=project_id, + container_type=ContainerType.MODEL, + additional_params=additional_params, + ) + + def create_model_version( + self, project_id: UniqueId, model_id: UniqueId + ) -> ApiExperiment: + return self._create_experiment( + project_id=project_id, + parent_id=model_id, + container_type=ContainerType.MODEL_VERSION, + ) + + def _create_experiment( + self, + project_id: UniqueId, + parent_id: UniqueId, + container_type: ContainerType, + additional_params: Optional[dict] = None, + ): + if additional_params is None: + additional_params = dict() + + params = { + "projectIdentifier": project_id, + "parentId": parent_id, + "type": container_type.to_api(), + "cliVersion": str(neptune_client_version), + **additional_params, + } + kwargs = { "experimentCreationParams": params, "X-Neptune-CliVersion": str(neptune_client_version), @@ -333,14 +407,14 @@ def create_run( } try: - run = ( + experiment = ( self.leaderboard_client.api.createExperiment(**kwargs).response().result ) - return ApiRun( - run.id, run.shortId, run.organizationName, run.projectName, run.trashed - ) + return ApiExperiment.from_experiment(experiment) except HTTPNotFound: raise ProjectNotFound(project_id=project_id) + except HTTPConflict as e: + raise NeptuneObjectCreationConflict() from e @with_api_exceptions_handler def create_checkpoint(self, notebook_id: str, jupyter_path: str) -> Optional[str]: @@ -371,11 +445,11 @@ def ping(self, container_id: str, container_type: ContainerType): **request_kwargs, ).response().result except HTTPNotFound as e: - raise_container_not_found(container_id, container_type, from_exception=e) + raise ContainerUUIDNotFound(container_id, container_type) from e def execute_operations( self, - container_id: str, + container_id: UniqueId, container_type: ContainerType, operations: List[Operation], ) -> Tuple[int, List[NeptuneException]]: @@ -558,7 +632,7 @@ def _execute_artifact_operations( @with_api_exceptions_handler def _execute_operations( self, - container_id: str, + container_id: UniqueId, container_type: ContainerType, operations: List[Operation], ) -> List[MetadataInconsistency]: @@ -584,7 +658,7 @@ def _execute_operations( ) return [MetadataInconsistency(err.errorDescription) for err in result] except HTTPNotFound as e: - raise_container_not_found(container_id, container_type, from_exception=e) + raise ContainerUUIDNotFound(container_id, container_type) from e except (HTTPPaymentRequired, HTTPUnprocessableEntity) as e: raise NeptuneLimitExceedException( reason=e.response.json().get("title", "Unknown reason") @@ -602,7 +676,7 @@ def to_attribute(attr) -> Attribute: **DEFAULT_REQUEST_KWARGS, } try: - run = ( + experiment = ( self.leaderboard_client.api.getExperimentAttributes(**params) .response() .result @@ -610,11 +684,13 @@ def to_attribute(attr) -> Attribute: attribute_type_names = [at.value for at in AttributeType] accepted_attributes = [ - attr for attr in run.attributes if attr.type in attribute_type_names + attr + for attr in experiment.attributes + if attr.type in attribute_type_names ] # Notify about ignored attrs - ignored_attributes = set(attr.type for attr in run.attributes) - set( + ignored_attributes = set(attr.type for attr in experiment.attributes) - set( attr.type for attr in accepted_attributes ) if ignored_attributes: @@ -630,11 +706,10 @@ def to_attribute(attr) -> Attribute: if attr.type in attribute_type_names ] except HTTPNotFound as e: - raise_container_not_found( + raise ContainerUUIDNotFound( container_id=container_id, container_type=container_type, - from_exception=e, - ) + ) from e def download_file_series_by_index( self, @@ -1014,7 +1089,7 @@ def fetch_atom_attribute_values( if attr.name.startswith(namespace_prefix) ] except HTTPNotFound as e: - raise_container_not_found(container_id, container_type, from_exception=e) + raise ContainerUUIDNotFound(container_id, container_type) from e # pylint: disable=unused-argument @with_api_exceptions_handler @@ -1038,28 +1113,27 @@ def _get_file_set_download_request( raise FetchAttributeNotFoundException(path_to_str(path)) @with_api_exceptions_handler - def get_leaderboard( + def search_leaderboard_entries( self, - project_id: str, - _id: Optional[Iterable[str]] = None, - state: Optional[Iterable[str]] = None, - owner: Optional[Iterable[str]] = None, - tags: Optional[Iterable[str]] = None, + project_id: UniqueId, + types: Optional[Iterable[ContainerType]] = None, + query: Optional[NQLQuery] = None, ) -> List[LeaderboardEntry]: + query_params = {} + if query: + query_params = {"query": {"query": str(query)}} + def get_portion(limit, offset): return ( - self.leaderboard_client.api.getLeaderboard( + self.leaderboard_client.api.searchLeaderboardEntries( projectIdentifier=project_id, - shortId=_id, - state=state, - owner=owner, - tags=tags, - tagsMode="and", - sortBy=["shortId"], - sortFieldType=["string"], - sortDirection=["ascending"], - limit=limit, - offset=offset, + type=list( + map(lambda container_type: container_type.to_api(), types) + ), + params={ + **query_params, + "pagination": {"limit": limit, "offset": offset}, + }, **DEFAULT_REQUEST_KWARGS, ) .response() @@ -1077,7 +1151,7 @@ def to_leaderboard_entry(entry) -> LeaderboardEntry: attr.name, AttributeType(attr.type), properties ) ) - return LeaderboardEntry(entry.id, attributes) + return LeaderboardEntry(entry.experimentId, attributes) try: return [ @@ -1088,10 +1162,33 @@ def to_leaderboard_entry(entry) -> LeaderboardEntry: raise ProjectNotFound(project_id) def get_run_url( - self, run_id: str, workspace: str, project_name: str, short_id: str + self, run_id: str, workspace: str, project_name: str, sys_id: str + ) -> str: + base_url = self.get_display_address() + return f"{base_url}/{workspace}/{project_name}/e/{sys_id}" + + def get_project_url( + self, project_id: str, workspace: str, project_name: str + ) -> str: + base_url = self.get_display_address() + return f"{base_url}/{workspace}/{project_name}/" + + def get_model_url( + self, model_id: str, workspace: str, project_name: str, sys_id: str + ) -> str: + base_url = self.get_display_address() + return f"{base_url}/{workspace}/{project_name}/m/{sys_id}" + + def get_model_version_url( + self, + model_version_id: str, + model_id: str, + workspace: str, + project_name: str, + sys_id: str, ) -> str: base_url = self.get_display_address() - return f"{base_url}/{workspace}/{project_name}/e/{short_id}" + return f"{base_url}/{workspace}/{project_name}/m/{model_id}/v/{sys_id}" @staticmethod def _get_all_items(get_portion, step): diff --git a/neptune/new/internal/backends/neptune_backend.py b/neptune/new/internal/backends/neptune_backend.py index de6ecd823..0b4a0932a 100644 --- a/neptune/new/internal/backends/neptune_backend.py +++ b/neptune/new/internal/backends/neptune_backend.py @@ -14,12 +14,12 @@ # limitations under the License. # import abc -from typing import Any, Iterable, List, Optional, Tuple +from typing import Any, Iterable, List, Optional, Tuple, Union from neptune.new.exceptions import NeptuneException from neptune.new.internal.artifacts.types import ArtifactFileData from neptune.new.internal.backends.api_model import ( - ApiRun, + ApiExperiment, ArtifactAttribute, Attribute, AttributeType, @@ -39,7 +39,9 @@ StringSetAttribute, Workspace, ) +from neptune.new.internal.backends.nql import NQLQuery from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.id_formats import QualifiedName, UniqueId from neptune.new.internal.operation import Operation from neptune.new.internal.websockets.websockets_factory import WebsocketsFactory from neptune.new.types.atoms import GitRef @@ -66,7 +68,7 @@ def websockets_factory( return None @abc.abstractmethod - def get_project(self, project_id: str) -> Project: + def get_project(self, project_id: QualifiedName) -> Project: pass @abc.abstractmethod @@ -79,19 +81,39 @@ def get_available_projects( def get_available_workspaces(self) -> List[Workspace]: pass - @abc.abstractmethod - def get_run(self, run_id: str) -> ApiRun: - pass - @abc.abstractmethod def create_run( self, - project_id: str, + project_id: UniqueId, git_ref: Optional[GitRef] = None, custom_run_id: Optional[str] = None, notebook_id: Optional[str] = None, checkpoint_id: Optional[str] = None, - ) -> ApiRun: + ) -> ApiExperiment: + pass + + @abc.abstractmethod + def create_model( + self, + project_id: UniqueId, + key: str, + ) -> ApiExperiment: + pass + + @abc.abstractmethod + def create_model_version( + self, + project_id: UniqueId, + model_id: UniqueId, + ) -> ApiExperiment: + pass + + @abc.abstractmethod + def get_metadata_container( + self, + container_id: Union[UniqueId, QualifiedName], + expected_container_type: ContainerType, + ) -> ApiExperiment: pass @abc.abstractmethod @@ -104,7 +126,7 @@ def ping(self, container_id: str, container_type: ContainerType): @abc.abstractmethod def execute_operations( self, - container_id: str, + container_id: UniqueId, container_type: ContainerType, operations: List[Operation], ) -> Tuple[int, List[NeptuneException]]: @@ -248,7 +270,30 @@ def get_float_series_values( @abc.abstractmethod def get_run_url( - self, run_id: str, workspace: str, project_name: str, short_id: str + self, run_id: str, workspace: str, project_name: str, sys_id: str + ) -> str: + pass + + @abc.abstractmethod + def get_project_url( + self, project_id: str, workspace: str, project_name: str + ) -> str: + pass + + @abc.abstractmethod + def get_model_url( + self, model_id: str, workspace: str, project_name: str, sys_id: str + ) -> str: + pass + + @abc.abstractmethod + def get_model_version_url( + self, + model_version_id: str, + model_id: str, + workspace: str, + project_name: str, + sys_id: str, ) -> str: pass @@ -259,12 +304,10 @@ def fetch_atom_attribute_values( pass @abc.abstractmethod - def get_leaderboard( + def search_leaderboard_entries( self, - project_id: str, - _id: Optional[Iterable[str]] = None, - state: Optional[Iterable[str]] = None, - owner: Optional[Iterable[str]] = None, - tags: Optional[Iterable[str]] = None, + project_id: UniqueId, + types: Optional[Iterable[ContainerType]] = None, + query: Optional[NQLQuery] = None, ) -> List[LeaderboardEntry]: pass diff --git a/neptune/new/internal/backends/neptune_backend_mock.py b/neptune/new/internal/backends/neptune_backend_mock.py index 2a586bf07..cb075569b 100644 --- a/neptune/new/internal/backends/neptune_backend_mock.py +++ b/neptune/new/internal/backends/neptune_backend_mock.py @@ -15,9 +15,10 @@ # import os import uuid +from collections import defaultdict from datetime import datetime from shutil import copyfile -from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, TypeVar +from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, TypeVar, Union from zipfile import ZipFile from neptune.new.exceptions import ( @@ -25,11 +26,13 @@ MetadataInconsistency, NeptuneException, RunNotFound, - raise_container_not_found, + ModelVersionNotFound, + ProjectNotFound, + ContainerUUIDNotFound, ) from neptune.new.internal.artifacts.types import ArtifactFileData from neptune.new.internal.backends.api_model import ( - ApiRun, + ApiExperiment, ArtifactAttribute, Attribute, AttributeType, @@ -56,6 +59,7 @@ ) from neptune.new.internal.backends.neptune_backend import NeptuneBackend from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.id_formats import SysId, QualifiedName, UniqueId from neptune.new.internal.operation import ( AddStrings, AssignArtifact, @@ -84,10 +88,11 @@ UploadFileSet, ) from neptune.new.internal.operation_visitor import OperationVisitor -from neptune.new.internal.run_structure import ContainerStructure +from neptune.new.internal.container_structure import ContainerStructure from neptune.new.internal.utils import base64_decode from neptune.new.internal.utils.generic_attribute_mapper import NoValue from neptune.new.internal.utils.paths import path_to_str +from neptune.new.metadata_containers import Model from neptune.new.types import Boolean, Integer from neptune.new.types.atoms import GitRef from neptune.new.types.atoms.artifact import Artifact @@ -103,6 +108,7 @@ from neptune.new.types.sets.string_set import StringSet from neptune.new.types.value import Value from neptune.new.types.value_visitor import ValueVisitor +from neptune.new.internal.backends.nql import NQLQuery Val = TypeVar("Val", bound=Value) @@ -110,15 +116,17 @@ class NeptuneBackendMock(NeptuneBackend): WORKSPACE_NAME = "offline" PROJECT_NAME = "project-placeholder" - PROJECT_KEY = "OFFLINE" + PROJECT_KEY = SysId("OFFLINE") + MODEL_SYS_ID = SysId("OFFLINE-MOD") def __init__(self, credentials=None, proxies=None): # pylint: disable=unused-argument - self._project_id: str = str(uuid.uuid4()) + self._project_id: UniqueId = UniqueId(str(uuid.uuid4())) self._containers: Dict[ - (str, ContainerType), ContainerStructure[Value, dict] + (UniqueId, ContainerType), ContainerStructure[Value, dict] ] = dict() - self._next_run = 1 + self._next_run = 1 # counter for runs + self._next_model_version = defaultdict(lambda: 1) # counter for model versions self._artifacts: Dict[Tuple[str, str], List[ArtifactFileData]] = dict() self._attribute_type_converter_value_visitor = ( self.AttributeTypeConverterValueVisitor() @@ -130,19 +138,23 @@ def __init__(self, credentials=None, proxies=None): def get_display_address(self) -> str: return "OFFLINE" - def get_project(self, project_id: str) -> Project: - return Project(self._project_id, self.PROJECT_NAME, self.WORKSPACE_NAME) - def get_available_projects( self, workspace_id: Optional[str] = None, search_term: Optional[str] = None ) -> List[Project]: - return [Project(str(uuid.uuid4()), self.PROJECT_NAME, self.WORKSPACE_NAME)] + return [ + Project( + id=UniqueId(str(uuid.uuid4())), + name=self.PROJECT_NAME, + workspace=self.WORKSPACE_NAME, + sys_id=self.PROJECT_KEY, + ) + ] def get_available_workspaces(self) -> List[Workspace]: - return [Workspace(str(uuid.uuid4()), self.WORKSPACE_NAME)] + return [Workspace(id=UniqueId(str(uuid.uuid4())), name=self.WORKSPACE_NAME)] def _create_container( - self, container_id: str, container_type: ContainerType, sys_id: str + self, container_id: UniqueId, container_type: ContainerType, sys_id: SysId ): container = self._containers.setdefault( (container_id, container_type), ContainerStructure[Value, dict]() @@ -155,44 +167,116 @@ def _create_container( container.set(["sys", "creation_time"], Datetime(datetime.now())) container.set(["sys", "modification_time"], Datetime(datetime.now())) container.set(["sys", "failed"], Boolean(False)) + if container_type == ContainerType.MODEL_VERSION: + container.set(["sys", "model_id"], String(self.MODEL_SYS_ID)) + container.set(["sys", "stage"], String("none")) return container - def _get_container(self, container_id: str, container_type: ContainerType): + def _get_container(self, container_id: UniqueId, container_type: ContainerType): key = (container_id, container_type) if key not in self._containers: - raise_container_not_found(container_id, container_type) + raise ContainerUUIDNotFound(container_id, container_type) container = self._containers[(container_id, container_type)] return container def create_run( self, - project_id: str, + project_id: UniqueId, git_ref: Optional[GitRef] = None, custom_run_id: Optional[str] = None, notebook_id: Optional[str] = None, checkpoint_id: Optional[str] = None, - ) -> ApiRun: - short_id = f"{self.PROJECT_KEY}-{self._next_run}" + ) -> ApiExperiment: + sys_id = SysId(f"{self.PROJECT_KEY}-{self._next_run}") self._next_run += 1 - new_run_id = str(uuid.uuid4()) - container = self._create_container( - new_run_id, ContainerType.RUN, sys_id=short_id - ) + new_run_id = UniqueId(str(uuid.uuid4())) + container = self._create_container(new_run_id, ContainerType.RUN, sys_id=sys_id) if git_ref: container.set(["source_code", "git"], git_ref) - return ApiRun( - new_run_id, short_id, self.WORKSPACE_NAME, self.PROJECT_NAME, False + return ApiExperiment( + id=new_run_id, + type=ContainerType.RUN, + sys_id=sys_id, + workspace=self.WORKSPACE_NAME, + project_name=self.PROJECT_NAME, + trashed=False, + ) + + def create_model(self, project_id: str, key: str) -> ApiExperiment: + sys_id = SysId(f"{self.PROJECT_KEY}-{key}") + new_run_id = UniqueId(str(uuid.uuid4())) + self._create_container(new_run_id, ContainerType.MODEL, sys_id=sys_id) + return ApiExperiment( + id=new_run_id, + type=ContainerType.MODEL, + sys_id=sys_id, + workspace=self.WORKSPACE_NAME, + project_name=self.PROJECT_NAME, + trashed=False, + ) + + def create_model_version( + self, project_id: str, model_id: UniqueId + ) -> ApiExperiment: + try: + model_key = self._get_container( + container_id=model_id, container_type=ContainerType.MODEL + ).get("sys/id") + except ContainerUUIDNotFound: + model_key = "MOD" + + sys_id = SysId( + f"{self.PROJECT_KEY}-{model_key}-{self._next_model_version[model_id]}" + ) + self._next_model_version[model_id] += 1 + new_run_id = UniqueId(str(uuid.uuid4())) + self._create_container(new_run_id, ContainerType.MODEL_VERSION, sys_id=sys_id) + return ApiExperiment( + id=new_run_id, + type=ContainerType.MODEL, + sys_id=sys_id, + workspace=self.WORKSPACE_NAME, + project_name=self.PROJECT_NAME, + trashed=False, ) def create_checkpoint(self, notebook_id: str, jupyter_path: str) -> Optional[str]: return None - def get_run(self, run_id: str) -> ApiRun: - raise RunNotFound(run_id) + def get_project(self, project_id: QualifiedName) -> Project: + return Project( + id=self._project_id, + name=self.PROJECT_NAME, + workspace=self.WORKSPACE_NAME, + sys_id=self.PROJECT_KEY, + ) + + def get_metadata_container( + self, + container_id: Union[UniqueId, QualifiedName], + expected_container_type: ContainerType, + ) -> ApiExperiment: + if "/" not in container_id: + raise ValueError("Backend mock expect container_id as QualifiedName only") + + if expected_container_type == ContainerType.RUN: + raise RunNotFound(container_id) + elif expected_container_type == ContainerType.MODEL: + return ApiExperiment( + id=UniqueId(str(uuid.uuid4())), + type=Model.container_type, + sys_id=SysId(container_id.rsplit("/", 1)[-1]), + workspace=self.WORKSPACE_NAME, + project_name=self.PROJECT_NAME, + ) + elif expected_container_type == ContainerType.MODEL_VERSION: + raise ModelVersionNotFound(container_id) + else: + raise ProjectNotFound(container_id) def execute_operations( self, - container_id: str, + container_id: UniqueId, container_type: ContainerType, operations: List[Operation], ) -> Tuple[int, List[NeptuneException]]: @@ -205,7 +289,7 @@ def execute_operations( return len(operations), result def _execute_operation( - self, container_id: str, container_type: ContainerType, op: Operation + self, container_id: UniqueId, container_type: ContainerType, op: Operation ) -> None: run = self._get_container(container_id, container_type) val = run.get(op.path) @@ -424,10 +508,30 @@ def download_file_series_by_index( """Non relevant for backend""" def get_run_url( - self, run_id: str, workspace: str, project_name: str, short_id: str + self, run_id: str, workspace: str, project_name: str, sys_id: str ) -> str: return f"offline/{run_id}" + def get_project_url( + self, project_id: str, workspace: str, project_name: str + ) -> str: + return f"offline/{project_id}" + + def get_model_url( + self, model_id: str, workspace: str, project_name: str, sys_id: str + ) -> str: + return f"offline/{model_id}" + + def get_model_version_url( + self, + model_version_id: str, + model_id: str, + workspace: str, + project_name: str, + sys_id: str, + ) -> str: + return f"offline/{model_version_id}" + def _get_attribute_values(self, value_dict, path_prefix: List[str]): assert isinstance(value_dict, dict) for k, value in value_dict.items(): @@ -458,13 +562,11 @@ def fetch_atom_attribute_values( if full_path.startswith(namespace_prefix) ] - def get_leaderboard( + def search_leaderboard_entries( self, - project_id: str, - _id: Optional[Iterable[str]] = None, - state: Optional[Iterable[str]] = None, - owner: Optional[Iterable[str]] = None, - tags: Optional[Iterable[str]] = None, + project_id: UniqueId, + types: Optional[Iterable[ContainerType]] = None, + query: Optional[NQLQuery] = None, ) -> List[LeaderboardEntry]: """Non relevant for mock""" diff --git a/neptune/new/internal/backends/nql.py b/neptune/new/internal/backends/nql.py new file mode 100644 index 000000000..06da145c7 --- /dev/null +++ b/neptune/new/internal/backends/nql.py @@ -0,0 +1,70 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +__all__ = [ + "NQLQuery", + "NQLAggregator", + "NQLQueryAggregate", + "NQLAttributeOperator", + "NQLAttributeType", + "NQLQueryAttribute", +] + +from enum import Enum +from typing import Iterable +from dataclasses import dataclass + + +@dataclass +class NQLQuery: + pass + + +class NQLAggregator(str, Enum): + AND = "AND" + OR = "OR" + + +@dataclass +class NQLQueryAggregate(NQLQuery): + items: Iterable[NQLQuery] + aggregator: NQLAggregator + + def __str__(self) -> str: + if self.items: + return "(" + f" {self.aggregator.value} ".join(map(str, self.items)) + ")" + return "" + + +class NQLAttributeOperator(str, Enum): + EQUALS = "=" + CONTAINS = "CONTAINS" + + +class NQLAttributeType(str, Enum): + STRING = "string" + STRING_SET = "stringSet" + EXPERIMENT_STATE = "experimentState" + + +@dataclass +class NQLQueryAttribute(NQLQuery): + name: str + type: NQLAttributeType + operator: NQLAttributeOperator + value: str + + def __str__(self) -> str: + return f'(`{self.name}`:{self.type.value} {self.operator.value} "{self.value}")' diff --git a/neptune/new/internal/backends/project_name_lookup.py b/neptune/new/internal/backends/project_name_lookup.py index 8088362b6..0d4e32363 100644 --- a/neptune/new/internal/backends/project_name_lookup.py +++ b/neptune/new/internal/backends/project_name_lookup.py @@ -20,16 +20,16 @@ from neptune.new.envs import PROJECT_ENV_NAME from neptune.new.exceptions import NeptuneMissingProjectNameException from neptune.new.internal.backends.neptune_backend import NeptuneBackend +from neptune.new.internal.id_formats import QualifiedName from neptune.new.internal.utils import verify_type from neptune.new.internal.backends.api_model import Project -from neptune.new.version import version as parsed_version - -__version__ = str(parsed_version) _logger = logging.getLogger(__name__) -def project_name_lookup(backend: NeptuneBackend, name: Optional[str] = None) -> Project: +def project_name_lookup( + backend: NeptuneBackend, name: Optional[QualifiedName] = None +) -> Project: verify_type("name", name, (str, type(None))) if not name: diff --git a/neptune/new/internal/backgroud_job_list.py b/neptune/new/internal/backgroud_job_list.py index 206a7edff..78708dea1 100644 --- a/neptune/new/internal/backgroud_job_list.py +++ b/neptune/new/internal/backgroud_job_list.py @@ -19,16 +19,16 @@ from neptune.new.internal.background_job import BackgroundJob if TYPE_CHECKING: - from neptune.new.run import Run + from neptune.new.metadata_containers import MetadataContainer class BackgroundJobList(BackgroundJob): def __init__(self, jobs: List[BackgroundJob]): self._jobs = jobs - def start(self, run: "Run"): + def start(self, container: "MetadataContainer"): for job in self._jobs: - job.start(run) + job.start(container) def stop(self): for job in self._jobs: diff --git a/neptune/new/internal/background_job.py b/neptune/new/internal/background_job.py index 6ecc24626..5fa2bb757 100644 --- a/neptune/new/internal/background_job.py +++ b/neptune/new/internal/background_job.py @@ -19,12 +19,12 @@ from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: - from neptune.new.run import Run + from neptune.new.metadata_containers import MetadataContainer class BackgroundJob: @abc.abstractmethod - def start(self, run: "Run"): + def start(self, container: "MetadataContainer"): pass @abc.abstractmethod diff --git a/neptune/new/internal/run_structure.py b/neptune/new/internal/container_structure.py similarity index 100% rename from neptune/new/internal/run_structure.py rename to neptune/new/internal/container_structure.py diff --git a/neptune/new/internal/container_type.py b/neptune/new/internal/container_type.py index 06a8d4ffa..4fdf14b49 100644 --- a/neptune/new/internal/container_type.py +++ b/neptune/new/internal/container_type.py @@ -17,6 +17,21 @@ import enum -class ContainerType(enum.Enum): +class ContainerType(str, enum.Enum): RUN = "run" PROJECT = "project" + MODEL = "model" + MODEL_VERSION = "model_version" + + def to_api(self) -> str: + if self == ContainerType.MODEL_VERSION: + return "modelVersion" + else: + return self.value + + @staticmethod + def from_api(api_type: str) -> "ContainerType": + if api_type == "modelVersion": + return ContainerType.MODEL_VERSION + else: + return ContainerType(api_type) diff --git a/neptune/new/internal/containers/storage_queue.py b/neptune/new/internal/containers/storage_queue.py deleted file mode 100644 index edde28126..000000000 --- a/neptune/new/internal/containers/storage_queue.py +++ /dev/null @@ -1,61 +0,0 @@ -# -# Copyright (c) 2020, Neptune Labs Sp. z o.o. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import abc -from typing import Generic, TypeVar, List, Optional, Tuple - -T = TypeVar("T") - - -class StorageQueue(Generic[T]): - - # NOTICE: All implementations should be thread-safe as long as there is only one consumer and one producer. - - @abc.abstractmethod - def put(self, obj: T) -> int: - pass - - @abc.abstractmethod - def get(self) -> Tuple[Optional[T], int]: - pass - - @abc.abstractmethod - def get_batch(self, size: int) -> Tuple[Optional[List[T]], int]: - pass - - @abc.abstractmethod - def flush(self) -> None: - pass - - @abc.abstractmethod - def close(self): - pass - - @abc.abstractmethod - def wait_for_empty(self, seconds: Optional[float] = None) -> bool: - pass - - @abc.abstractmethod - def ack(self, version: int) -> None: - pass - - @abc.abstractmethod - def is_empty(self) -> bool: - pass - - @abc.abstractmethod - def size(self) -> int: - pass diff --git a/neptune/new/internal/containers/disk_queue.py b/neptune/new/internal/disk_queue.py similarity index 92% rename from neptune/new/internal/containers/disk_queue.py rename to neptune/new/internal/disk_queue.py index 2015dd36c..e02153ce3 100644 --- a/neptune/new/internal/containers/disk_queue.py +++ b/neptune/new/internal/disk_queue.py @@ -19,21 +19,20 @@ import threading from glob import glob from pathlib import Path -from typing import Callable, List, Optional, Tuple, TypeVar +from typing import Callable, Generic, List, Optional, Tuple, TypeVar from neptune.new.exceptions import MalformedOperation -from neptune.new.internal.container_type import ContainerType -from neptune.new.internal.containers.storage_queue import StorageQueue -from neptune.new.internal.utils.container_type_file import ContainerTypeFile from neptune.new.internal.utils.json_file_splitter import JsonFileSplitter from neptune.new.internal.utils.sync_offset_file import SyncOffsetFile +__all__ = ["DiskQueue"] + T = TypeVar("T") _logger = logging.getLogger(__name__) -class DiskQueue(StorageQueue[T]): +class DiskQueue(Generic[T]): # NOTICE: This class is thread-safe as long as there is only one consumer and one producer. @@ -43,7 +42,6 @@ def __init__( to_dict: Callable[[T], dict], from_dict: Callable[[dict], T], lock: threading.RLock, - container_type: ContainerType = None, max_file_size: int = 64 * 1024 ** 2, ): self._dir_path = dir_path.resolve() @@ -56,12 +54,6 @@ def __init__( except FileExistsError: pass - # save information regarding container type in queue directory - container_file_type = ContainerTypeFile( - dir_path, expected_container_type=container_type - ) - container_file_type.save() - self._last_ack_file = SyncOffsetFile(dir_path / "last_ack_version", default=0) self._last_put_file = SyncOffsetFile(dir_path / "last_put_version", default=0) diff --git a/neptune/new/internal/hardware/hardware_metric_reporting_job.py b/neptune/new/internal/hardware/hardware_metric_reporting_job.py index c1828f778..79b77ab1b 100644 --- a/neptune/new/internal/hardware/hardware_metric_reporting_job.py +++ b/neptune/new/internal/hardware/hardware_metric_reporting_job.py @@ -41,7 +41,7 @@ from neptune.new.internal.threading.daemon import Daemon if TYPE_CHECKING: - from neptune.new.run import Run + from neptune.new.metadata_containers import MetadataContainer _logger = logging.getLogger(__name__) @@ -54,7 +54,7 @@ def __init__(self, period: float = 10, attribute_namespace: str = "monitoring"): self._gauges_in_resource: Dict[str, int] = dict() self._attribute_namespace = attribute_namespace - def start(self, run: "Run"): + def start(self, container: "MetadataContainer"): gauge_mode = GaugeMode.CGROUP if in_docker() else GaugeMode.SYSTEM system_resource_info = SystemResourceInfoFactory( system_monitor=SystemMonitor(), @@ -76,12 +76,14 @@ def start(self, run: "Run"): for metric in metrics_container.metrics(): for gauge in metric.gauges: path = self.get_attribute_name(metric.resource_type, gauge.name()) - if not run.get_attribute(path): - run[path] = FloatSeries( + if not container.get_attribute(path): + container[path] = FloatSeries( [], min=metric.min_value, max=metric.max_value, unit=metric.unit ) - self._thread = self.ReportingThread(self, self._period, run, metric_reporter) + self._thread = self.ReportingThread( + self, self._period, container, metric_reporter + ) self._thread.start() self._started = True @@ -108,12 +110,12 @@ def __init__( self, outer: "HardwareMetricReportingJob", period: float, - run: "Run", + container: "MetadataContainer", metric_reporter: MetricReporter, ): super().__init__(sleep_time=period, name="NeptuneReporting") self._outer = outer - self._run = run + self._container = container self._metric_reporter = metric_reporter def work(self) -> None: @@ -122,7 +124,7 @@ def work(self) -> None: for gauge_name, metric_values in groupby( report.values, lambda value: value.gauge_name ): - attr = self._run[ + attr = self._container[ self._outer.get_attribute_name( report.metric.resource_type, gauge_name ) diff --git a/neptune/new/internal/id_formats.py b/neptune/new/internal/id_formats.py new file mode 100644 index 000000000..b9d031de2 --- /dev/null +++ b/neptune/new/internal/id_formats.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import typing +from typing import NewType + +UniqueId = NewType("UniqueId", str) + +SysId = NewType("SysId", str) + +QualifiedName = NewType("QualifiedName", str) + + +def conform_optional(value: typing.Optional[str], cls): + return cls(value) if value is not None else None diff --git a/neptune/new/internal/init_impl.py b/neptune/new/internal/init/__init__.py similarity index 73% rename from neptune/new/internal/init_impl.py rename to neptune/new/internal/init/__init__.py index de6b81b5d..1114ff026 100644 --- a/neptune/new/internal/init_impl.py +++ b/neptune/new/internal/init/__init__.py @@ -13,10 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from neptune.new.internal.init_project import get_project, init_project -from neptune.new.internal.init_run import __version__, init_run +from neptune.new.internal.init.project import get_project, init_project +from neptune.new.internal.init.run import init_run +from neptune.new.internal.init.model import init_model +from neptune.new.internal.init.model_version import init_model_version from neptune.new.types.mode import Mode init = init_run RunMode = Mode -__all__ = ["__version__", "get_project", "init_project", "init_run", "init", "RunMode"] diff --git a/neptune/new/internal/init/model.py b/neptune/new/internal/init/model.py new file mode 100644 index 000000000..d06296b86 --- /dev/null +++ b/neptune/new/internal/init/model.py @@ -0,0 +1,144 @@ +# +# Copyright (c) 2021, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import threading +from typing import Optional + +from neptune.new.attributes import constants as attr_consts +from neptune.new.exceptions import ( + NeedExistingModelForReadOnlyMode, + NeptuneException, + NeptuneMissingRequiredInitParameter, + NeptuneObjectCreationConflict, + NeptuneModelKeyAlreadyExistsError, +) +from neptune.new.internal import id_formats +from neptune.new.internal.backends.factory import get_backend +from neptune.new.internal.backends.project_name_lookup import project_name_lookup +from neptune.new.internal.backgroud_job_list import BackgroundJobList +from neptune.new.internal.id_formats import QualifiedName +from neptune.new.internal.init.parameters import ( + DEFAULT_FLUSH_PERIOD, + DEFAULT_NAME, + OFFLINE_PROJECT_QUALIFIED_NAME, +) +from neptune.new.internal.operation_processors.factory import get_operation_processor +from neptune.new.internal.utils import verify_type +from neptune.new.internal.utils.ping_background_job import PingBackgroundJob +from neptune.new.metadata_containers import Model +from neptune.new.types.mode import Mode + + +def init_model( + *, + model: Optional[str] = None, + name: Optional[str] = None, + key: Optional[str] = None, + project: Optional[str] = None, + api_token: Optional[str] = None, + mode: str = Mode.ASYNC.value, + flush_period: float = DEFAULT_FLUSH_PERIOD, + proxies: Optional[dict] = None, +) -> Model: + verify_type("model", model, (str, type(None))) + verify_type("name", name, (str, type(None))) + verify_type("key", key, (str, type(None))) + verify_type("project", project, (str, type(None))) + verify_type("api_token", api_token, (str, type(None))) + verify_type("mode", mode, str) + verify_type("flush_period", flush_period, (int, float)) + verify_type("proxies", proxies, (dict, type(None))) + # make mode proper Enum instead of string + mode = Mode(mode) + + if mode == Mode.OFFLINE: + raise NeptuneException("Model can't be initialized in OFFLINE mode") + + name = DEFAULT_NAME if model is None and name is None else name + + backend = get_backend(mode=mode, api_token=api_token, proxies=proxies) + + if mode == Mode.OFFLINE or mode == Mode.DEBUG: + project = OFFLINE_PROJECT_QUALIFIED_NAME + + project = id_formats.conform_optional(project, QualifiedName) + project_obj = project_name_lookup(backend=backend, name=project) + project = f"{project_obj.workspace}/{project_obj.name}" + + if model is not None: + # model (resume existing model) has priority over key (creating a new model) + # additional creation parameters (e.g. name) are simply ignored in this scenario + model = QualifiedName(project + "/" + model) + api_model = backend.get_metadata_container( + container_id=model, expected_container_type=Model.container_type + ) + elif key is not None: + if mode == Mode.READ_ONLY: + raise NeedExistingModelForReadOnlyMode() + + try: + api_model = backend.create_model(project_id=project_obj.id, key=key) + except NeptuneObjectCreationConflict as e: + base_url = backend.get_display_address() + raise NeptuneModelKeyAlreadyExistsError( + model_key=key, + models_tab_url=f"{base_url}/{project_obj.workspace}/{project_obj.name}/models", + ) from e + + else: + raise NeptuneMissingRequiredInitParameter( + parameter_name="key", + called_function="init_model", + ) + + model_lock = threading.RLock() + + operation_processor = get_operation_processor( + mode=mode, + container_id=api_model.id, + container_type=Model.container_type, + backend=backend, + lock=model_lock, + flush_period=flush_period, + ) + + background_jobs = [] + if mode != Mode.READ_ONLY: + background_jobs.append(PingBackgroundJob()) + + _model = Model( + id_=api_model.id, + backend=backend, + op_processor=operation_processor, + background_job=BackgroundJobList(background_jobs), + lock=model_lock, + workspace=api_model.workspace, + project_name=api_model.project_name, + sys_id=api_model.sys_id, + project_id=project_obj.id, + ) + + if mode != Mode.OFFLINE: + _model.sync(wait=False) + + if mode != Mode.READ_ONLY: + if name is not None: + _model[attr_consts.SYSTEM_NAME_ATTRIBUTE_PATH] = name + + # pylint: disable=protected-access + _model._startup(debug_mode=mode == Mode.DEBUG) + + return _model diff --git a/neptune/new/internal/init/model_version.py b/neptune/new/internal/init/model_version.py new file mode 100644 index 000000000..90ce06774 --- /dev/null +++ b/neptune/new/internal/init/model_version.py @@ -0,0 +1,138 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import threading +from typing import Optional + +from neptune.new.attributes import constants as attr_consts +from neptune.new.exceptions import ( + NeedExistingModelVersionForReadOnlyMode, + NeptuneException, + NeptuneMissingRequiredInitParameter, +) +from neptune.new.internal import id_formats +from neptune.new.internal.backends.factory import get_backend +from neptune.new.internal.backends.project_name_lookup import project_name_lookup +from neptune.new.internal.backgroud_job_list import BackgroundJobList +from neptune.new.internal.id_formats import QualifiedName +from neptune.new.internal.init.parameters import ( + DEFAULT_FLUSH_PERIOD, + DEFAULT_NAME, + OFFLINE_PROJECT_QUALIFIED_NAME, +) +from neptune.new.internal.operation_processors.factory import get_operation_processor +from neptune.new.internal.utils import verify_type +from neptune.new.internal.utils.ping_background_job import PingBackgroundJob +from neptune.new.metadata_containers import ModelVersion, Model +from neptune.new.types.mode import Mode + + +def init_model_version( + *, + version: Optional[str] = None, + name: Optional[str] = None, + model: Optional[str] = None, + project: Optional[str] = None, + api_token: Optional[str] = None, + mode: str = Mode.ASYNC.value, + flush_period: float = DEFAULT_FLUSH_PERIOD, + proxies: Optional[dict] = None, +) -> ModelVersion: + verify_type("version", version, (str, type(None))) + verify_type("name", name, (str, type(None))) + verify_type("model", model, (str, type(None))) + verify_type("project", project, (str, type(None))) + verify_type("api_token", api_token, (str, type(None))) + verify_type("mode", mode, str) + verify_type("flush_period", flush_period, (int, float)) + verify_type("proxies", proxies, (dict, type(None))) + # make mode proper Enum instead of string + mode = Mode(mode) + + if mode == Mode.OFFLINE: + raise NeptuneException("Model can't be initialized in OFFLINE mode") + + name = DEFAULT_NAME if model is None and name is None else name + + backend = get_backend(mode=mode, api_token=api_token, proxies=proxies) + + if mode == Mode.OFFLINE or mode == Mode.DEBUG: + project = OFFLINE_PROJECT_QUALIFIED_NAME + + project = id_formats.conform_optional(project, QualifiedName) + project_obj = project_name_lookup(backend, project) + project = f"{project_obj.workspace}/{project_obj.name}" + + if version is not None: + # version (resume existing model_version) has priority over model (creating a new model_version) + version = QualifiedName(project + "/" + version) + api_model_version = backend.get_metadata_container( + container_id=version, expected_container_type=ModelVersion.container_type + ) + elif model is not None: + if mode == Mode.READ_ONLY: + raise NeedExistingModelVersionForReadOnlyMode() + + model_id = QualifiedName(project + "/" + model) + api_model = backend.get_metadata_container( + container_id=model_id, expected_container_type=Model.container_type + ) + api_model_version = backend.create_model_version( + project_id=project_obj.id, model_id=api_model.id + ) + else: + raise NeptuneMissingRequiredInitParameter( + parameter_name="model", + called_function="init_model_version", + ) + + model_lock = threading.RLock() + + operation_processor = get_operation_processor( + mode=mode, + container_id=api_model_version.id, + container_type=ModelVersion.container_type, + backend=backend, + lock=model_lock, + flush_period=flush_period, + ) + + background_jobs = [] + if mode != Mode.READ_ONLY: + background_jobs.append(PingBackgroundJob()) + + _model_version = ModelVersion( + id_=api_model_version.id, + backend=backend, + op_processor=operation_processor, + background_job=BackgroundJobList(background_jobs), + lock=model_lock, + workspace=api_model_version.workspace, + project_name=api_model_version.project_name, + sys_id=api_model_version.sys_id, + project_id=project_obj.id, + ) + if mode != Mode.OFFLINE: + _model_version.sync(wait=False) + + if mode != Mode.READ_ONLY: + if name is not None: + _model_version[attr_consts.SYSTEM_NAME_ATTRIBUTE_PATH] = name + + # pylint: disable=protected-access + _model_version._startup(debug_mode=mode == Mode.DEBUG) + + return _model_version diff --git a/neptune/new/internal/init/parameters.py b/neptune/new/internal/init/parameters.py new file mode 100644 index 000000000..1c0584e32 --- /dev/null +++ b/neptune/new/internal/init/parameters.py @@ -0,0 +1,19 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +DEFAULT_FLUSH_PERIOD = 5 +DEFAULT_NAME = "Untitled" +OFFLINE_PROJECT_QUALIFIED_NAME = "offline/project-placeholder" diff --git a/neptune/new/internal/init_project.py b/neptune/new/internal/init/project.py similarity index 80% rename from neptune/new/internal/init_project.py rename to neptune/new/internal/init/project.py index 94b2dc094..7633508ec 100644 --- a/neptune/new/internal/init_project.py +++ b/neptune/new/internal/init/project.py @@ -13,23 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import logging import threading from typing import Optional from neptune.new.exceptions import NeptuneException +from neptune.new.internal import id_formats from neptune.new.internal.backends.factory import get_backend from neptune.new.internal.backends.project_name_lookup import project_name_lookup from neptune.new.internal.backgroud_job_list import BackgroundJobList +from neptune.new.internal.id_formats import QualifiedName +from neptune.new.internal.init.parameters import DEFAULT_FLUSH_PERIOD from neptune.new.internal.operation_processors.factory import get_operation_processor from neptune.new.internal.utils import verify_type -from neptune.new.project import Project +from neptune.new.metadata_containers import Project from neptune.new.types.mode import Mode -from neptune.new.version import version as parsed_version - -__version__ = str(parsed_version) - -_logger = logging.getLogger(__name__) def init_project( @@ -37,7 +34,7 @@ def init_project( name: Optional[str] = None, api_token: Optional[str] = None, mode: str = Mode.ASYNC.value, - flush_period: float = 5, + flush_period: float = DEFAULT_FLUSH_PERIOD, proxies: Optional[dict] = None, ) -> Project: verify_type("name", name, (str, type(None))) @@ -45,17 +42,20 @@ def init_project( verify_type("mode", mode, str) verify_type("flush_period", flush_period, (int, float)) verify_type("proxies", proxies, (dict, type(None))) + # make mode proper Enum instead of string + mode = Mode(mode) if mode == Mode.OFFLINE: raise NeptuneException("Project can't be initialized in OFFLINE mode") - backend = get_backend(mode, api_token=api_token, proxies=proxies) - project_obj = project_name_lookup(backend, name) + name = id_formats.conform_optional(name, QualifiedName) + backend = get_backend(mode=mode, api_token=api_token, proxies=proxies) + project_obj = project_name_lookup(backend=backend, name=name) project_lock = threading.RLock() operation_processor = get_operation_processor( - mode, + mode=mode, container_id=project_obj.id, container_type=Project.container_type, backend=backend, @@ -66,14 +66,16 @@ def init_project( background_jobs = [] project = Project( - project_obj.id, - backend, - operation_processor, - BackgroundJobList(background_jobs), - project_lock, - project_obj.workspace, - project_obj.name, + id_=project_obj.id, + backend=backend, + op_processor=operation_processor, + background_job=BackgroundJobList(background_jobs), + lock=project_lock, + workspace=project_obj.workspace, + project_name=project_obj.name, + sys_id=project_obj.sys_id, ) + if mode != Mode.OFFLINE: project.sync(wait=False) diff --git a/neptune/new/internal/init_run.py b/neptune/new/internal/init/run.py similarity index 90% rename from neptune/new/internal/init_run.py rename to neptune/new/internal/init/run.py index deecb9b4f..519a85a03 100644 --- a/neptune/new/internal/init_run.py +++ b/neptune/new/internal/init/run.py @@ -14,9 +14,9 @@ # limitations under the License. # -import logging import os import threading +import typing from platform import node as get_hostname from typing import List, Optional, Union @@ -32,6 +32,7 @@ NeptunePossibleLegacyUsageException, NeptuneRunResumeAndCustomIdCollision, ) +from neptune.new.internal import id_formats from neptune.new.internal.backends.factory import get_backend from neptune.new.internal.backends.neptune_backend import NeptuneBackend from neptune.new.internal.backends.project_name_lookup import project_name_lookup @@ -39,6 +40,12 @@ from neptune.new.internal.hardware.hardware_metric_reporting_job import ( HardwareMetricReportingJob, ) +from neptune.new.internal.id_formats import QualifiedName +from neptune.new.internal.init.parameters import ( + DEFAULT_FLUSH_PERIOD, + DEFAULT_NAME, + OFFLINE_PROJECT_QUALIFIED_NAME, +) from neptune.new.internal.notebooks.notebooks import create_checkpoint from neptune.new.internal.operation_processors.factory import get_operation_processor from neptune.new.internal.streams.std_capture_background_job import ( @@ -54,14 +61,9 @@ from neptune.new.internal.websockets.websocket_signals_background_job import ( WebsocketSignalsBackgroundJob, ) -from neptune.new.run import Run +from neptune.new.metadata_containers import Run from neptune.new.types.mode import Mode from neptune.new.types.series.string_series import StringSeries -from neptune.new.version import version as parsed_version - -__version__ = str(parsed_version) - -_logger = logging.getLogger(__name__) LEGACY_KWARGS = ("project_qualified_name", "backend") @@ -93,7 +95,7 @@ def init_run( capture_hardware_metrics: bool = True, fail_on_exception: bool = True, monitoring_namespace: Optional[str] = None, - flush_period: float = 5, + flush_period: float = DEFAULT_FLUSH_PERIOD, proxies: Optional[dict] = None, capture_traceback: bool = True, **kwargs, @@ -219,7 +221,10 @@ def init_run( else: verify_collection_type("source_files", source_files, str) - name = "Untitled" if run is None and name is None else name + # for backward compatibility imports + mode = Mode(mode) + + name = DEFAULT_NAME if run is None and name is None else name description = "" if run is None and description is None else description hostname = get_hostname() if run is None else None custom_run_id = custom_run_id or os.getenv(CUSTOM_RUN_ID_ENV_NAME) @@ -230,16 +235,20 @@ def init_run( if run and custom_run_id: raise NeptuneRunResumeAndCustomIdCollision() - backend = get_backend(mode, api_token=api_token, proxies=proxies) + backend = get_backend(mode=mode, api_token=api_token, proxies=proxies) if mode == Mode.OFFLINE or mode == Mode.DEBUG: - project = "offline/project-placeholder" + project = OFFLINE_PROJECT_QUALIFIED_NAME + project = id_formats.conform_optional(project, QualifiedName) project_obj = project_name_lookup(backend, project) project = f"{project_obj.workspace}/{project_obj.name}" if run: - api_run = backend.get_run(project + "/" + run) + api_run = backend.get_metadata_container( + container_id=QualifiedName(project + "/" + run), + expected_container_type=Run.container_type, + ) else: if mode == Mode.READ_ONLY: raise NeedExistingRunForReadOnlyMode() @@ -250,13 +259,17 @@ def init_run( notebook_id, checkpoint_id = _create_notebook_checkpoint(backend) api_run = backend.create_run( - project_obj.id, git_ref, custom_run_id, notebook_id, checkpoint_id + project_id=project_obj.id, + git_ref=git_ref, + custom_run_id=custom_run_id, + notebook_id=notebook_id, + checkpoint_id=checkpoint_id, ) run_lock = threading.RLock() operation_processor = get_operation_processor( - mode, + mode=mode, container_id=api_run.id, container_type=Run.container_type, backend=backend, @@ -290,16 +303,16 @@ def init_run( background_jobs.append(PingBackgroundJob()) _run = Run( - api_run.id, - backend, - operation_processor, - BackgroundJobList(background_jobs), - run_lock, - api_run.workspace, - api_run.project_name, - api_run.short_id, - project_obj.id, - monitoring_namespace, + id_=api_run.id, + backend=backend, + op_processor=operation_processor, + background_job=BackgroundJobList(background_jobs), + lock=run_lock, + workspace=api_run.workspace, + project_name=api_run.project_name, + sys_id=api_run.sys_id, + project_id=project_obj.id, + monitoring_namespace=monitoring_namespace, ) if mode != Mode.OFFLINE: _run.sync(wait=False) @@ -331,7 +344,9 @@ def init_run( return _run -def _create_notebook_checkpoint(backend: NeptuneBackend) -> (str, str): +def _create_notebook_checkpoint( + backend: NeptuneBackend, +) -> typing.Tuple[typing.Optional[str], typing.Optional[str]]: notebook_id = None if os.getenv(NEPTUNE_NOTEBOOK_ID, None) is not None: notebook_id = os.environ[NEPTUNE_NOTEBOOK_ID] diff --git a/neptune/new/internal/operation_processors/async_operation_processor.py b/neptune/new/internal/operation_processors/async_operation_processor.py index 93083ffe8..34f1bf285 100644 --- a/neptune/new/internal/operation_processors/async_operation_processor.py +++ b/neptune/new/internal/operation_processors/async_operation_processor.py @@ -23,8 +23,9 @@ import click from neptune.new.internal.container_type import ContainerType -from neptune.new.internal.containers.storage_queue import StorageQueue +from neptune.new.internal.disk_queue import DiskQueue from neptune.new.internal.backends.neptune_backend import NeptuneBackend +from neptune.new.internal.id_formats import UniqueId from neptune.new.internal.operation import Operation from neptune.new.internal.operation_processors.operation_processor import ( OperationProcessor, @@ -42,9 +43,9 @@ class AsyncOperationProcessor(OperationProcessor): def __init__( self, - container_id: str, + container_id: UniqueId, container_type: ContainerType, - queue: StorageQueue[Operation], + queue: DiskQueue[Operation], backend: NeptuneBackend, lock: threading.RLock, sleep_time: float = 5, diff --git a/neptune/new/internal/operation_processors/factory.py b/neptune/new/internal/operation_processors/factory.py index 51a198d3f..3f5236711 100644 --- a/neptune/new/internal/operation_processors/factory.py +++ b/neptune/new/internal/operation_processors/factory.py @@ -14,6 +14,8 @@ # limitations under the License. # +__all__ = ["get_operation_processor"] + import os import threading from datetime import datetime @@ -25,29 +27,31 @@ OFFLINE_DIRECTORY, ) from neptune.new.internal.backends.neptune_backend import NeptuneBackend -from neptune.new.internal.containers.disk_queue import DiskQueue +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.disk_queue import DiskQueue +from neptune.new.internal.id_formats import UniqueId from neptune.new.internal.operation import Operation +from neptune.new.sync.utils import create_dir_name from neptune.new.types.mode import Mode from .async_operation_processor import AsyncOperationProcessor from .offline_operation_processor import OfflineOperationProcessor from .operation_processor import OperationProcessor from .read_only_operation_processor import ReadOnlyOperationProcessor from .sync_operation_processor import SyncOperationProcessor -from ..container_type import ContainerType def get_operation_processor( mode: Mode, - container_id: str, + container_id: UniqueId, container_type: ContainerType, backend: NeptuneBackend, lock: threading.RLock, flush_period: float, ) -> OperationProcessor: - if mode == Mode.ASYNC: - data_path = "{}/{}/{}".format( - NEPTUNE_DATA_DIRECTORY, ASYNC_DIRECTORY, container_id + data_path = ( + f"{NEPTUNE_DATA_DIRECTORY}/{ASYNC_DIRECTORY}" + f"/{create_dir_name(container_type, container_id)}" ) try: execution_id = len(os.listdir(data_path)) @@ -59,11 +63,10 @@ def get_operation_processor( container_id, container_type, DiskQueue( - Path(execution_path), - lambda x: x.to_dict(), - Operation.from_dict, - lock, - container_type, + dir_path=Path(execution_path), + to_dict=lambda x: x.to_dict(), + from_dict=Operation.from_dict, + lock=lock, ), backend, lock, @@ -75,15 +78,15 @@ def get_operation_processor( return SyncOperationProcessor(container_id, container_type, backend) elif mode == Mode.OFFLINE: # the object was returned by mocked backend and has some random ID. - data_path = "{}/{}/{}".format( - NEPTUNE_DATA_DIRECTORY, OFFLINE_DIRECTORY, container_id + data_path = ( + f"{NEPTUNE_DATA_DIRECTORY}/{OFFLINE_DIRECTORY}" + f"/{create_dir_name(container_type, container_id)}" ) storage_queue = DiskQueue( - Path(data_path), - lambda x: x.to_dict(), - Operation.from_dict, - lock, - container_type, + dir_path=Path(data_path), + to_dict=lambda x: x.to_dict(), + from_dict=Operation.from_dict, + lock=lock, ) return OfflineOperationProcessor(storage_queue) elif mode == Mode.READ_ONLY: diff --git a/neptune/new/internal/operation_processors/offline_operation_processor.py b/neptune/new/internal/operation_processors/offline_operation_processor.py index 392df8132..9eefe3f5a 100644 --- a/neptune/new/internal/operation_processors/offline_operation_processor.py +++ b/neptune/new/internal/operation_processors/offline_operation_processor.py @@ -15,7 +15,7 @@ # from typing import Optional -from neptune.new.internal.containers.storage_queue import StorageQueue +from neptune.new.internal.disk_queue import DiskQueue from neptune.new.internal.operation import Operation from neptune.new.internal.operation_processors.operation_processor import ( OperationProcessor, @@ -23,7 +23,7 @@ class OfflineOperationProcessor(OperationProcessor): - def __init__(self, queue: StorageQueue[Operation]): + def __init__(self, queue: DiskQueue[Operation]): self._queue = queue def enqueue_operation(self, op: Operation, wait: bool) -> None: diff --git a/neptune/new/internal/streams/std_capture_background_job.py b/neptune/new/internal/streams/std_capture_background_job.py index 4c33c5984..29fa4cb6a 100644 --- a/neptune/new/internal/streams/std_capture_background_job.py +++ b/neptune/new/internal/streams/std_capture_background_job.py @@ -27,7 +27,7 @@ ) if TYPE_CHECKING: - from neptune.new.run import Run + from neptune.new.metadata_containers import MetadataContainer class StdoutCaptureBackgroundJob(BackgroundJob): @@ -35,8 +35,8 @@ def __init__(self, attribute_name: str = MONITORING_STDOUT_ATTRIBUTE_PATH): self._attribute_name = attribute_name self._logger = None - def start(self, run: "Run"): - self._logger = StdoutCaptureLogger(run, self._attribute_name) + def start(self, container: "MetadataContainer"): + self._logger = StdoutCaptureLogger(container, self._attribute_name) def stop(self): self._logger.close() @@ -50,8 +50,8 @@ def __init__(self, attribute_name: str = MONITORING_STDERR_ATTRIBUTE_PATH): self._attribute_name = attribute_name self._logger = None - def start(self, run: "Run"): - self._logger = StderrCaptureLogger(run, self._attribute_name) + def start(self, container: "MetadataContainer"): + self._logger = StderrCaptureLogger(container, self._attribute_name) def stop(self): self._logger.close() diff --git a/neptune/new/internal/streams/std_stream_capture_logger.py b/neptune/new/internal/streams/std_stream_capture_logger.py index b4e664f92..eab56e536 100644 --- a/neptune/new/internal/streams/std_stream_capture_logger.py +++ b/neptune/new/internal/streams/std_stream_capture_logger.py @@ -19,13 +19,15 @@ from typing import TextIO -from neptune.new.run import Run +from neptune.new.metadata_containers import MetadataContainer from neptune.new.logging import Logger as NeptuneLogger class StdStreamCaptureLogger: - def __init__(self, run: Run, attribute_name: str, stream: TextIO): - self._logger = NeptuneLogger(run, attribute_name) + def __init__( + self, container: MetadataContainer, attribute_name: str, stream: TextIO + ): + self._logger = NeptuneLogger(container, attribute_name) self.stream = stream self._thread_local = threading.local() self.enabled = True @@ -50,8 +52,8 @@ def close(self): class StdoutCaptureLogger(StdStreamCaptureLogger): - def __init__(self, run: Run, attribute_name: str): - super().__init__(run, attribute_name, sys.stdout) + def __init__(self, container: MetadataContainer, attribute_name: str): + super().__init__(container, attribute_name, sys.stdout) sys.stdout = self def close(self): @@ -60,8 +62,8 @@ def close(self): class StderrCaptureLogger(StdStreamCaptureLogger): - def __init__(self, run: Run, attribute_name: str): - super().__init__(run, attribute_name, sys.stderr) + def __init__(self, container: MetadataContainer, attribute_name: str): + super().__init__(container, attribute_name, sys.stderr) sys.stderr = self def close(self): diff --git a/neptune/new/internal/utils/__init__.py b/neptune/new/internal/utils/__init__.py index 0fca44fd0..bd8403a45 100644 --- a/neptune/new/internal/utils/__init__.py +++ b/neptune/new/internal/utils/__init__.py @@ -180,3 +180,15 @@ def is_ipython() -> bool: return ipython is not None except ImportError: return False + + +def as_list( + name: str, value: Optional[Union[str, Iterable[str]]] +) -> Optional[Iterable[str]]: + verify_type(name, value, (type(None), str, Iterable)) + if value is None: + return [] + if isinstance(value, str): + return [value] + verify_collection_type(name, value, str) + return value diff --git a/neptune/new/internal/utils/container_type_file.py b/neptune/new/internal/utils/container_type_file.py deleted file mode 100644 index 5bffb5638..000000000 --- a/neptune/new/internal/utils/container_type_file.py +++ /dev/null @@ -1,67 +0,0 @@ -# -# Copyright (c) 2021, Neptune Labs Sp. z o.o. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from pathlib import Path - -from neptune.new.internal.container_type import ContainerType -from neptune.new.internal.exceptions import NeptuneInternalException - - -class ContainerTypeFile: - FILE_NAME = "container_type" - - def __init__(self, dir_path: Path, expected_container_type: ContainerType = None): - self._file = dir_path / self.FILE_NAME - self._container_type = self.check_container_type(expected_container_type) - - @property - def container_type(self) -> ContainerType: - return self._container_type - - def check_container_type( - self, expected_container_type: ContainerType - ) -> ContainerType: - """Make sure that queue will serve requested `default_container_type` - or analyze container_type based on information stored on disk.""" - container_type_from_file = None - if self._file.exists(): - with open(self._file, "r") as f: - container_type_from_file = ContainerType(f.read()) - - if container_type_from_file is None: - # No information about type stored on disk - if expected_container_type is None: - # When working with legacy data - # container_type couldn't be determined - # and for backward compatibility - # it should be Run at default - return ContainerType.RUN - return expected_container_type - else: - # Information about type is stored on disk - if ( - expected_container_type is not None - and container_type_from_file != expected_container_type - ): - raise NeptuneInternalException( - f"Expected container_type ({expected_container_type.value})" - f" doesn't match the one from file ({container_type_from_file.value})" - ) - return container_type_from_file - - def save(self): - """Saves information regarding container_type in queue directory""" - with open(self._file, "w") as f: - f.write(self._container_type.value) diff --git a/neptune/new/internal/utils/ping_background_job.py b/neptune/new/internal/utils/ping_background_job.py index 1bf39cedf..8e9083191 100644 --- a/neptune/new/internal/utils/ping_background_job.py +++ b/neptune/new/internal/utils/ping_background_job.py @@ -22,7 +22,7 @@ from neptune.new.internal.threading.daemon import Daemon if TYPE_CHECKING: - from neptune.new.run import Run + from neptune.new.metadata_containers import MetadataContainer _logger = logging.getLogger(__name__) @@ -33,8 +33,8 @@ def __init__(self, period: float = 10): self._thread = None self._started = False - def start(self, run: "Run"): - self._thread = self.ReportingThread(self._period, run) + def start(self, container: "MetadataContainer"): + self._thread = self.ReportingThread(self._period, container) self._thread.start() self._started = True @@ -49,9 +49,9 @@ def join(self, seconds: Optional[float] = None): self._thread.join(seconds) class ReportingThread(Daemon): - def __init__(self, period: float, run: "Run"): + def __init__(self, period: float, container: "MetadataContainer"): super().__init__(sleep_time=period, name="NeptunePing") - self._run = run + self._container = container @Daemon.ConnectionRetryWrapper( kill_message=( @@ -60,4 +60,4 @@ def __init__(self, period: float, run: "Run"): ) ) def work(self) -> None: - self._run.ping() + self._container.ping() diff --git a/neptune/new/internal/utils/traceback_job.py b/neptune/new/internal/utils/traceback_job.py index 11a5041b3..15b31c468 100644 --- a/neptune/new/internal/utils/traceback_job.py +++ b/neptune/new/internal/utils/traceback_job.py @@ -26,7 +26,7 @@ ) if TYPE_CHECKING: - from neptune.new.run import Run + from neptune.new.metadata_containers import MetadataContainer _logger = logging.getLogger(__name__) @@ -38,15 +38,15 @@ def __init__(self, path: str, fail_on_exception: bool = True): self._path = path self._fail_on_exception = fail_on_exception - def start(self, run: "Run"): + def start(self, container: "MetadataContainer"): if not self._started: path = self._path fail_on_exception = self._fail_on_exception def log_traceback(stacktrace_lines: List[str]): - run[path].log(stacktrace_lines) + container[path].log(stacktrace_lines) if fail_on_exception: - run[SYSTEM_FAILED_ATTRIBUTE_PATH] = True + container[SYSTEM_FAILED_ATTRIBUTE_PATH] = True traceback_handler.register(self._uuid, log_traceback) self._started = True diff --git a/neptune/new/internal/value_to_attribute_visitor.py b/neptune/new/internal/value_to_attribute_visitor.py index e68ef2264..9d5d62d6f 100644 --- a/neptune/new/internal/value_to_attribute_visitor.py +++ b/neptune/new/internal/value_to_attribute_visitor.py @@ -46,58 +46,58 @@ from neptune.new.types.value_visitor import ValueVisitor if TYPE_CHECKING: - from neptune.new import Run + from neptune.new import MetadataContainer class ValueToAttributeVisitor(ValueVisitor[Attribute]): - def __init__(self, run: "Run", path: List[str]): - self._run = run + def __init__(self, container: "MetadataContainer", path: List[str]): + self._container = container self._path = path def visit_float(self, _: Float) -> Attribute: - return FloatAttr(self._run, self._path) + return FloatAttr(self._container, self._path) def visit_integer(self, _: Integer) -> Attribute: - return IntegerAttr(self._run, self._path) + return IntegerAttr(self._container, self._path) def visit_boolean(self, _: Boolean) -> Attribute: - return BooleanAttr(self._run, self._path) + return BooleanAttr(self._container, self._path) def visit_string(self, _: String) -> Attribute: - return StringAttr(self._run, self._path) + return StringAttr(self._container, self._path) def visit_datetime(self, _: Datetime) -> Attribute: - return DatetimeAttr(self._run, self._path) + return DatetimeAttr(self._container, self._path) def visit_artifact(self, _: Artifact) -> Attribute: - return ArtifactAttr(self._run, self._path) + return ArtifactAttr(self._container, self._path) def visit_file(self, _: File) -> Attribute: - return FileAttr(self._run, self._path) + return FileAttr(self._container, self._path) def visit_file_set(self, _: FileSet) -> Attribute: - return FileSetAttr(self._run, self._path) + return FileSetAttr(self._container, self._path) def visit_float_series(self, _: FloatSeries) -> Attribute: - return FloatSeriesAttr(self._run, self._path) + return FloatSeriesAttr(self._container, self._path) def visit_string_series(self, _: StringSeries) -> Attribute: - return StringSeriesAttr(self._run, self._path) + return StringSeriesAttr(self._container, self._path) def visit_image_series(self, _: FileSeries) -> Attribute: - return ImageSeriesAttr(self._run, self._path) + return ImageSeriesAttr(self._container, self._path) def visit_string_set(self, _: StringSet) -> Attribute: - return StringSetAttr(self._run, self._path) + return StringSetAttr(self._container, self._path) def visit_git_ref(self, _: GitRef) -> Attribute: raise OperationNotSupported("Cannot create custom attribute of type GitRef") def visit_namespace(self, _: Namespace) -> Attribute: - return NamespaceAttr(self._run, self._path) + return NamespaceAttr(self._container, self._path) # pylint: disable=unused-argument def copy_value( self, source_type: Type[Attribute], source_path: List[str] ) -> Attribute: - return source_type(self._run, self._path) + return source_type(self._container, self._path) diff --git a/neptune/new/internal/websockets/websocket_signals_background_job.py b/neptune/new/internal/websockets/websocket_signals_background_job.py index 0bf1a5a38..cdf642616 100644 --- a/neptune/new/internal/websockets/websocket_signals_background_job.py +++ b/neptune/new/internal/websockets/websocket_signals_background_job.py @@ -36,7 +36,7 @@ from neptune.new.internal.websockets.websockets_factory import WebsocketsFactory if TYPE_CHECKING: - from neptune.new.run import Run + from neptune.new.metadata_containers import MetadataContainer _logger = logging.getLogger(__name__) @@ -47,8 +47,8 @@ def __init__(self, ws_factory: WebsocketsFactory): self._thread: "Optional[WebsocketSignalsBackgroundJob._ListenerThread]" = None self._started = False - def start(self, run: "Run"): - self._thread = self._ListenerThread(run, self._ws_factory.create()) + def start(self, container: "MetadataContainer"): + self._thread = self._ListenerThread(container, self._ws_factory.create()) self._thread.start() self._started = True @@ -66,9 +66,11 @@ def join(self, seconds: Optional[float] = None): self._thread.shutdown_ws_client() class _ListenerThread(Daemon): - def __init__(self, run: "Run", ws_client: ReconnectingWebsocket): + def __init__( + self, container: "MetadataContainer", ws_client: ReconnectingWebsocket + ): super().__init__(sleep_time=0, name="NeptuneWebhooks") - self._run = run + self._container = container self._ws_client = ws_client def work(self) -> None: @@ -112,10 +114,10 @@ def _handle_stop(self, msg_body): err=True, ) return - run_id = self._run["sys/id"].fetch() + run_id = self._container["sys/id"].fetch() click.echo(f"Run {run_id} received stop signal. Exiting", err=True) seconds = msg_body.get("seconds") - self._run.stop(seconds=seconds) + self._container.stop(seconds=seconds) process_killer.kill_me() def _handle_abort(self, msg_body): @@ -126,11 +128,11 @@ def _handle_abort(self, msg_body): err=True, ) return - run_id = self._run["sys/id"].fetch() + run_id = self._container["sys/id"].fetch() click.echo(f"Run {run_id} received abort signal. Exiting", err=True) seconds = msg_body.get("seconds") - self._run[SYSTEM_FAILED_ATTRIBUTE_PATH] = True - self._run.stop(seconds=seconds) + self._container[SYSTEM_FAILED_ATTRIBUTE_PATH] = True + self._container.stop(seconds=seconds) process_killer.kill_me() def shutdown_ws_client(self): diff --git a/neptune/new/logging/logger.py b/neptune/new/logging/logger.py index 72006eef5..4de410f26 100644 --- a/neptune/new/logging/logger.py +++ b/neptune/new/logging/logger.py @@ -14,13 +14,17 @@ # limitations under the License. # -from neptune.new.run import Run +from neptune.new.metadata_containers import MetadataContainer + +# backwards compatibility +# pylint: disable=unused-import +from neptune.new.metadata_containers import Run class Logger(object): - def __init__(self, run: Run, attribute_name: str): - self._run = run + def __init__(self, container: MetadataContainer, attribute_name: str): + self._container = container self._attribute_name = attribute_name def log(self, msg: str): - self._run[self._attribute_name].log(msg) + self._container[self._attribute_name].log(msg) diff --git a/neptune/new/metadata_containers/__init__.py b/neptune/new/metadata_containers/__init__.py new file mode 100644 index 000000000..cad9558b5 --- /dev/null +++ b/neptune/new/metadata_containers/__init__.py @@ -0,0 +1,21 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from neptune.new.metadata_containers.metadata_container import MetadataContainer +from neptune.new.metadata_containers.model import Model +from neptune.new.metadata_containers.model_version import ModelVersion +from neptune.new.metadata_containers.project import Project +from neptune.new.metadata_containers.run import Run diff --git a/neptune/new/attribute_container.py b/neptune/new/metadata_containers/metadata_container.py similarity index 85% rename from neptune/new/attribute_container.py rename to neptune/new/metadata_containers/metadata_container.py index 7c64a0906..00bc72a3e 100644 --- a/neptune/new/attribute_container.py +++ b/neptune/new/metadata_containers/metadata_container.py @@ -38,17 +38,21 @@ InactiveProjectException, InactiveRunException, NeptunePossibleLegacyUsageException, + InactiveModelException, + InactiveModelVersionException, ) from neptune.new.handler import Handler from neptune.new.internal.backends.api_model import AttributeType from neptune.new.internal.backends.neptune_backend import NeptuneBackend +from neptune.new.internal.backends.nql import NQLQuery from neptune.new.internal.background_job import BackgroundJob from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.id_formats import UniqueId, SysId from neptune.new.internal.operation import DeleteAttribute from neptune.new.internal.operation_processors.operation_processor import ( OperationProcessor, ) -from neptune.new.internal.run_structure import ContainerStructure +from neptune.new.internal.container_structure import ContainerStructure from neptune.new.internal.state import ContainerState from neptune.new.internal.utils import ( is_bool, @@ -74,38 +78,48 @@ from neptune.new.types.value import Value from neptune.new.types.value_copy import ValueCopy +from neptune.new.metadata_containers.metadata_containers_table import Table + def ensure_not_stopped(fun): @wraps(fun) - def inner_fun(self: "AttributeContainer", *args, **kwargs): + def inner_fun(self: "MetadataContainer", *args, **kwargs): # pylint: disable=protected-access if self._state == ContainerState.STOPPED: if self.container_type == ContainerType.RUN: raise InactiveRunException(label=self._label) elif self.container_type == ContainerType.PROJECT: raise InactiveProjectException(label=self._label) + elif self.container_type == ContainerType.MODEL: + raise InactiveModelException(label=self._label) + elif self.container_type == ContainerType.MODEL_VERSION: + raise InactiveModelVersionException(label=self._label) + else: + raise ValueError(f"Unknown container type: {self.container_type}") return fun(self, *args, **kwargs) return inner_fun -class AttributeContainer(AbstractContextManager): +class MetadataContainer(AbstractContextManager): container_type: ContainerType LEGACY_METHODS = set() def __init__( self, - _id: str, + *, + id_: UniqueId, backend: NeptuneBackend, op_processor: OperationProcessor, background_job: BackgroundJob, lock: threading.RLock, - project_id: str, + project_id: UniqueId, project_name: str, workspace: str, + sys_id: SysId, ): - self._id = _id + self._id = id_ self._project_id = project_id self._project_name = project_name self._workspace = workspace @@ -117,6 +131,7 @@ def __init__( ] = ContainerStructure(NamespaceBuilder(self)) self._lock = lock self._state = ContainerState.CREATED + self._sys_id = sys_id def __exit__(self, exc_type, exc_val, exc_tb): if exc_tb is not None: @@ -140,6 +155,11 @@ def _label(self) -> str: def _docs_url_stop(self) -> str: raise NotImplementedError + @property + @abc.abstractmethod + def _url(self) -> str: + raise NotImplementedError + def _get_subpath_suggestions( self, path_prefix: str = None, limit: int = 1000 ) -> List[str]: @@ -278,24 +298,14 @@ def exists(self, path: str) -> bool: verify_type("path", path, str) return self.get_attribute(path) is not None + @ensure_not_stopped def pop(self, path: str, wait: bool = False) -> None: verify_type("path", path, str) - with self._lock: - self._pop_impl(parse_path(path), wait) + self._get_root_handler().pop(path, wait) def _pop_impl(self, parsed_path: List[str], wait: bool): - attribute = self._structure.get(parsed_path) - if isinstance(attribute, NamespaceAttr): - self._pop_namespace(attribute, wait) - else: - self._structure.pop(parsed_path) - self._op_processor.enqueue_operation(DeleteAttribute(parsed_path), wait) - - def _pop_namespace(self, namespace: NamespaceAttr, wait: bool): - children = list(namespace) - for key in children: - sub_attr_path = namespace._path + [key] # pylint: disable=protected-access - self._pop_impl(sub_attr_path, wait) + self._structure.pop(parsed_path) + self._op_processor.enqueue_operation(DeleteAttribute(parsed_path), wait) def lock(self) -> threading.RLock: return self._lock @@ -323,7 +333,14 @@ def _define_attribute(self, _path: List[str], _type: AttributeType): def _get_root_handler(self): return Handler(self, "") + def get_url(self) -> str: + """Returns the URL that can be accessed within the browser""" + return self._url + def _startup(self, debug_mode): + if not debug_mode: + click.echo(self.get_url()) + self.start() if not debug_mode: @@ -339,3 +356,16 @@ def _startup(self, debug_mode): def _shutdown_hook(self): self.stop() + + def _fetch_entries(self, child_type: ContainerType, query: NQLQuery) -> Table: + leaderboard_entries = self._backend.search_leaderboard_entries( + project_id=self._project_id, + types=[child_type], + query=query, + ) + + return Table( + backend=self._backend, + container_type=child_type, + entries=leaderboard_entries, + ) diff --git a/neptune/new/metadata_containers/metadata_containers_table.py b/neptune/new/metadata_containers/metadata_containers_table.py new file mode 100644 index 000000000..9569c23f4 --- /dev/null +++ b/neptune/new/metadata_containers/metadata_containers_table.py @@ -0,0 +1,248 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import logging +from datetime import datetime +from typing import Any, List, Dict, Optional, Union + +from neptune.new.exceptions import MetadataInconsistency +from neptune.new.internal.backends.api_model import ( + LeaderboardEntry, + AttributeWithProperties, + AttributeType, +) +from neptune.new.internal.backends.neptune_backend import NeptuneBackend +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.utils.paths import join_paths, parse_path + +logger = logging.getLogger(__name__) + + +class TableEntry: + def __init__( + self, + backend: NeptuneBackend, + container_type: ContainerType, + _id: str, + attributes: List[AttributeWithProperties], + ): + self._backend = backend + self._container_type = container_type + self._id = _id + self._attributes = attributes + + def __getitem__(self, path: str) -> "LeaderboardHandler": + return LeaderboardHandler(table_entry=self, path=path) + + def get_attribute_type(self, path: str) -> AttributeType: + for attr in self._attributes: + if attr.path == path: + return attr.type + raise ValueError("Could not find {} attribute".format(path)) + + def get_attribute_value(self, path: str) -> Any: + for attr in self._attributes: + if attr.path == path: + _type = attr.type + if _type == AttributeType.RUN_STATE: + return attr.properties.value + if _type in ( + AttributeType.FLOAT, + AttributeType.INT, + AttributeType.BOOL, + AttributeType.STRING, + AttributeType.DATETIME, + ): + return attr.properties.value + if ( + _type == AttributeType.FLOAT_SERIES + or _type == AttributeType.STRING_SERIES + ): + return attr.properties.last + if _type == AttributeType.IMAGE_SERIES: + raise MetadataInconsistency("Cannot get value for image series.") + if _type == AttributeType.FILE: + raise MetadataInconsistency( + "Cannot get value for file attribute. Use download() instead." + ) + if _type == AttributeType.FILE_SET: + raise MetadataInconsistency( + "Cannot get value for file set attribute. Use download() instead." + ) + if _type == AttributeType.STRING_SET: + return set(attr.properties.values) + if _type == AttributeType.GIT_REF: + return attr.properties.commit.commitId + if _type == AttributeType.NOTEBOOK_REF: + return attr.properties.notebookName + if _type == AttributeType.ARTIFACT: + return attr.properties.hash + logger.error( + "Attribute type %s not supported in this version, yielding None. Recommended client upgrade.", + _type, + ) + return None + raise ValueError("Could not find {} attribute".format(path)) + + def download_file_attribute(self, path: str, destination: Optional[str]): + for attr in self._attributes: + if attr.path == path: + _type = attr.type + if _type == AttributeType.FILE: + self._backend.download_file( + container_id=self._id, + container_type=self._container_type, + path=parse_path(path), + destination=destination, + ) + return + raise MetadataInconsistency( + "Cannot download file from attribute of type {}".format(_type) + ) + raise ValueError("Could not find {} attribute".format(path)) + + def download_file_set_attribute(self, path: str, destination: Optional[str]): + for attr in self._attributes: + if attr.path == path: + _type = attr.type + if _type == AttributeType.FILE_SET: + self._backend.download_file_set( + container_id=self._id, + container_type=self._container_type, + path=parse_path(path), + destination=destination, + ) + return + raise MetadataInconsistency( + "Cannot download ZIP archive from attribute of type {}".format( + _type + ) + ) + raise ValueError("Could not find {} attribute".format(path)) + + +class LeaderboardHandler: + def __init__(self, table_entry: TableEntry, path: str): + self._table_entry = table_entry + self._path = path + + def __getitem__(self, path: str) -> "LeaderboardHandler": + return LeaderboardHandler( + table_entry=self._table_entry, path=join_paths(self._path, path) + ) + + def get(self): + return self._table_entry.get_attribute_value(path=self._path) + + def download(self, destination: Optional[str]): + attr_type = self._table_entry.get_attribute_type(self._path) + if attr_type == AttributeType.FILE: + return self._table_entry.download_file_attribute(self._path, destination) + elif attr_type == AttributeType.FILE_SET: + return self._table_entry.download_file_set_attribute( + path=self._path, destination=destination + ) + raise MetadataInconsistency( + "Cannot download file from attribute of type {}".format(attr_type) + ) + + +class Table: + def __init__( + self, + backend: NeptuneBackend, + container_type: ContainerType, + entries: List[LeaderboardEntry], + ): + self._backend = backend + self._entries = entries + self._container_type = container_type + + def to_rows(self) -> List[TableEntry]: + return [ + TableEntry( + backend=self._backend, + container_type=self._container_type, + _id=e.id, + attributes=e.attributes, + ) + for e in self._entries + ] + + def to_pandas(self): + # pylint:disable=import-outside-toplevel + import pandas as pd + + def make_attribute_value( + attribute: AttributeWithProperties, + ) -> Optional[Union[str, float, datetime]]: + _type = attribute.type + _properties = attribute.properties + if _type == AttributeType.RUN_STATE: + return _properties.value + if _type in ( + AttributeType.FLOAT, + AttributeType.INT, + AttributeType.BOOL, + AttributeType.STRING, + AttributeType.DATETIME, + ): + return _properties.value + if ( + _type == AttributeType.FLOAT_SERIES + or _type == AttributeType.STRING_SERIES + ): + return _properties.last + if _type == AttributeType.IMAGE_SERIES: + return None + if _type == AttributeType.FILE or _type == AttributeType.FILE_SET: + return None + if _type == AttributeType.STRING_SET: + return ",".join(_properties.values) + if _type == AttributeType.GIT_REF: + return _properties.commit.commitId + if _type == AttributeType.NOTEBOOK_REF: + return _properties.notebookName + if _type == AttributeType.ARTIFACT: + return _properties.hash + logger.error( + "Attribute type %s not supported in this version, yielding None. Recommended client upgrade.", + _type, + ) + return None + + def make_row( + entry: LeaderboardEntry, + ) -> Dict[str, Optional[Union[str, float, datetime]]]: + row: Dict[str, Union[str, float, datetime]] = dict() + for attr in entry.attributes: + value = make_attribute_value(attr) + if value is not None: + row[attr.path] = value + return row + + def sort_key(attr): + domain = attr.split("/")[0] + if domain == "sys": + return 0, attr + if domain == "monitoring": + return 2, attr + return 1, attr + + rows = dict((n, make_row(entry)) for (n, entry) in enumerate(self._entries)) + + df = pd.DataFrame.from_dict(data=rows, orient="index") + df = df.reindex(sorted(df.columns, key=sort_key), axis="columns") + return df diff --git a/neptune/new/metadata_containers/model.py b/neptune/new/metadata_containers/model.py new file mode 100644 index 000000000..a4c73afab --- /dev/null +++ b/neptune/new/metadata_containers/model.py @@ -0,0 +1,71 @@ +# +# Copyright (c) 2021, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from neptune.new.metadata_containers import MetadataContainer +from neptune.new.internal.container_type import ContainerType +from neptune.new.metadata_containers.metadata_containers_table import Table +from neptune.new.internal.backends.nql import ( + NQLQueryAttribute, + NQLAttributeOperator, + NQLAttributeType, +) + + +class Model(MetadataContainer): + """A class for managing a Neptune model and retrieving information from it. + + You may also want to check `Model docs page`_. + + .. _Model docs page: + https://docs.neptune.ai/api-reference/model + """ + + container_type = ContainerType.MODEL + + @property + def _docs_url_stop(self) -> str: + return "https://docs.neptune.ai/api-reference/model#.stop" + + @property + def _label(self) -> str: + return self._sys_id + + @property + def _url(self) -> str: + return self._backend.get_model_url( + model_id=self._id, + workspace=self._workspace, + project_name=self._project_name, + sys_id=self._sys_id, + ) + + def fetch_model_versions_table(self) -> Table: + """Retrieve all model versions of the given model. + + Returns: + ``Table``: object containing experiments matching the specified criteria. + + Use `.to_pandas()` to convert it to Pandas `DataFrame`. + """ + return MetadataContainer._fetch_entries( + self, + child_type=ContainerType.MODEL_VERSION, + query=NQLQueryAttribute( + name="sys/model_id", + value=self._sys_id, + operator=NQLAttributeOperator.EQUALS, + type=NQLAttributeType.STRING, + ), + ) diff --git a/neptune/new/metadata_containers/model_version.py b/neptune/new/metadata_containers/model_version.py new file mode 100644 index 000000000..f18420629 --- /dev/null +++ b/neptune/new/metadata_containers/model_version.py @@ -0,0 +1,73 @@ +# +# Copyright (c) 2021, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from neptune.new.metadata_containers import MetadataContainer +from neptune.new.attributes.constants import SYSTEM_STAGE_ATTRIBUTE_PATH +from neptune.new.exceptions import NeptuneOfflineModeChangeStageException +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.operation_processors.offline_operation_processor import ( + OfflineOperationProcessor, +) +from neptune.new.types.model_version_stage import ModelVersionStage + + +class ModelVersion(MetadataContainer): + """A class for managing a Neptune model version and retrieving information from it. + + You may also want to check `ModelVersion docs page`_. + + .. _ModelVersion docs page: + https://docs.neptune.ai/api-reference/model-version + """ + + container_type = ContainerType.MODEL_VERSION + + @property + def _docs_url_stop(self) -> str: + return "https://docs.neptune.ai/api-reference/model-version#.stop" + + @property + def _label(self) -> str: + return self._sys_id + + @property + def _url(self) -> str: + return self._backend.get_model_version_url( + model_version_id=self._id, + workspace=self._workspace, + project_name=self._project_name, + sys_id=self._sys_id, + model_id=self["sys/model_id"].fetch(), + ) + + def change_stage(self, stage: str): + mapped_stage = ModelVersionStage(stage) + + if isinstance(self._op_processor, OfflineOperationProcessor): + raise NeptuneOfflineModeChangeStageException() + + self.wait() + + with self.lock(): + attr = self.get_attribute(SYSTEM_STAGE_ATTRIBUTE_PATH) + # We are sure that such attribute exists, because + # SYSTEM_STAGE_ATTRIBUTE_PATH is set by default on ModelVersion creation + assert ( + attr is not None + ), f"No {SYSTEM_STAGE_ATTRIBUTE_PATH} found in model version" + attr.process_assignment( + value=mapped_stage.value, + wait=True, + ) diff --git a/neptune/new/metadata_containers/project.py b/neptune/new/metadata_containers/project.py new file mode 100644 index 000000000..be84fb7ac --- /dev/null +++ b/neptune/new/metadata_containers/project.py @@ -0,0 +1,394 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import threading +from typing import Iterable, Union, Optional, Dict, Any + +from neptune.new.internal.id_formats import UniqueId, SysId +from neptune.new.metadata_containers import MetadataContainer +from neptune.new.internal.backends.neptune_backend import NeptuneBackend +from neptune.new.internal.backends.nql import ( + NQLAggregator, + NQLQueryAggregate, + NQLQueryAttribute, + NQLAttributeOperator, + NQLAttributeType, +) +from neptune.new.internal.background_job import BackgroundJob +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.operation_processors.operation_processor import ( + OperationProcessor, +) +from neptune.new.internal.utils import as_list +from neptune.new.metadata_containers.metadata_containers_table import Table + + +class Project(MetadataContainer): + """A class for managing a Neptune project and retrieving information from it. + + You may also want to check `Project docs page`_. + + .. _Project docs page: + https://docs.neptune.ai/api-reference/project + """ + + container_type = ContainerType.PROJECT + + def __init__( + self, + *, + id_: UniqueId, + backend: NeptuneBackend, + op_processor: OperationProcessor, + background_job: BackgroundJob, + lock: threading.RLock, + workspace: str, + project_name: str, + sys_id: SysId, + ): + super().__init__( + id_=id_, + backend=backend, + op_processor=op_processor, + background_job=background_job, + lock=lock, + project_id=id_, + project_name=project_name, + workspace=workspace, + sys_id=sys_id, + ) + + @property + def _docs_url_stop(self) -> str: + return "https://docs.neptune.ai/api-reference/project#.stop" + + @property + def _label(self) -> str: + return f"{self._workspace}/{self._project_name}" + + @property + def _url(self) -> str: + return self._backend.get_project_url( + project_id=self._id, + workspace=self._workspace, + project_name=self._project_name, + ) + + # pylint:disable=redefined-builtin + def fetch_runs_table( + self, + id: Optional[Union[str, Iterable[str]]] = None, + state: Optional[Union[str, Iterable[str]]] = None, + owner: Optional[Union[str, Iterable[str]]] = None, + tag: Optional[Union[str, Iterable[str]]] = None, + ) -> Table: + """Retrieve runs matching the specified criteria. + + All parameters are optional, each of them specifies a single criterion. + Only runs matching all of the criteria will be returned. + + Args: + id (str or list of str, optional): A run's id or list of ids. + E.g. `'SAN-1'` or `['SAN-1', 'SAN-2']`. + Matching any element of the list is sufficient to pass the criterion. + Defaults to `None`. + state (str or list of str, optional): A run's state like or list of states. + E.g. `'running'` or `['idle', 'running']`. + Possible values: 'idle', 'running'. + Defaults to `None`. + Matching any element of the list is sufficient to pass the criterion. + owner (str or list of str, optional): Username of the run's owner or a list of owners. + E.g. 'josh' or ['frederic', 'josh']. + The user who created the tracked run is an owner. + Defaults to `None`. + Matching any element of the list is sufficient to pass the criterion. + tag (str or list of str, optional): An experiment tag or list of tags. + E.g. `'lightGBM'` or ['pytorch', 'cycleLR']. + Defaults to `None`. + Only experiments that have all specified tags will match this criterion. + + Returns: + ``Table``: object containing experiments matching the specified criteria. + + Use `.to_pandas()` to convert it to Pandas `DataFrame`. + + Examples: + >>> import neptune.new as neptune + + >>> # Fetch project 'jackie/sandbox' + ... project = neptune.get_project(name='jackie/sandbox') + + >>> # Fetch all Runs metadata as Pandas DataFrame + ... runs_table_df = project.fetch_runs_table().to_pandas() + + >>> # Sort runs by creation time + ... runs_table_df = runs_table_df.sort_values(by='sys/creation_time', ascending=False) + + >>> # Extract the last runs id + ... last_run_id = runs_table_df['sys/id'].values[0] + + You can also filter the runs table by state, owner or tag or a combination: + + >>> # Fetch only inactive runs + ... runs_table_df = project.fetch_runs_table(state='idle').to_pandas() + + >>> # Fetch only runs created by CI service + ... runs_table_df = project.fetch_runs_table(owner='my_company_ci_service').to_pandas() + + >>> # Fetch only runs that have both 'Exploration' and 'Optuna' tag + ... runs_table_df = project.fetch_runs_table(tag=['Exploration', 'Optuna']).to_pandas() + + >>> # You can combine conditions. Runs satisfying all conditions will be fetched + ... runs_table_df = project.fetch_runs_table(state='idle', tag='Exploration').to_pandas() + + You may also want to check `fetch_runs_table docs page`_. + + .. _fetch_runs_table docs page: + https://docs.neptune.ai/api-reference/project#fetch_runs_table + """ + ids = as_list("id", id) + states = as_list("state", state) + owners = as_list("owner", owner) + tags = as_list("tag", tag) + + query_items = [] + + if ids: + query_items.append( + NQLQueryAggregate( + items=[ + NQLQueryAttribute( + name="sys/id", + type=NQLAttributeType.STRING, + operator=NQLAttributeOperator.EQUALS, + value=api_id, + ) + for api_id in ids + ], + aggregator=NQLAggregator.OR, + ) + ) + + if states: + query_items.append( + NQLQueryAggregate( + items=[ + NQLQueryAttribute( + name="sys/state", + type=NQLAttributeType.EXPERIMENT_STATE, + operator=NQLAttributeOperator.EQUALS, + value=state, + ) + for state in states + ], + aggregator=NQLAggregator.OR, + ) + ) + + if owners: + query_items.append( + NQLQueryAggregate( + items=[ + NQLQueryAttribute( + name="sys/owner", + type=NQLAttributeType.STRING, + operator=NQLAttributeOperator.EQUALS, + value=owner, + ) + for owner in owners + ], + aggregator=NQLAggregator.OR, + ) + ) + + if tags: + query_items.append( + NQLQueryAggregate( + items=[ + NQLQueryAttribute( + name="sys/tags", + type=NQLAttributeType.STRING_SET, + operator=NQLAttributeOperator.CONTAINS, + value=tag, + ) + for tag in tags + ], + aggregator=NQLAggregator.OR, + ) + ) + + query = NQLQueryAggregate(items=query_items, aggregator=NQLAggregator.AND) + + return MetadataContainer._fetch_entries( + self, child_type=ContainerType.RUN, query=query + ) + + def assign(self, value, wait: bool = False) -> None: + """Assign values to multiple fields from a dictionary. + You can use this method to log multiple pieces of information with one command. + Args: + value (dict): A dictionary with values to assign, where keys become the paths of the fields. + The dictionary can be nested - in such case the path will be a combination of all keys. + wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. + This makes the call synchronous. Defaults to `False`. + Examples: + >>> import neptune.new as neptune + >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") + >>> # Assign multiple fields from a dictionary + ... general_info = {"brief": URL_TO_PROJECT_BRIEF, "deadline": "2049-06-30"} + >>> project["general"] = general_info + >>> # You can always log explicitly parameters one by one + ... project["general/brief"] = URL_TO_PROJECT_BRIEF + >>> project["general/deadline"] = "2049-06-30" + >>> # Dictionaries can be nested + ... general_info = {"brief": {"url": URL_TO_PROJECT_BRIEF}} + >>> project["general"] = general_info + >>> # This will log the url under path "general/brief/url" + You may also want to check `assign docs page`_. + .. _assign docs page: + https://docs.neptune.ai/api-reference/project#.assign + """ + return MetadataContainer.assign(self, value=value, wait=wait) + + def fetch(self) -> dict: + """Fetch values of all non-File Atom fields as a dictionary. + The result will preserve the hierarchical structure of the projects's metadata + but will contain only non-File Atom fields. + Returns: + `dict` containing all non-File Atom fields values. + Examples: + >>> import neptune.new as neptune + >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") + >>> # Fetch all the project metrics + >>> project_metrics = project["metrics"].fetch() + You may also want to check `fetch docs page`_. + .. _fetch docs page: + https://docs.neptune.ai/api-reference/project#.fetch + """ + return MetadataContainer.fetch(self) + + def stop(self, seconds: Optional[Union[float, int]] = None) -> None: + """Stops the connection to the project and kills the synchronization thread. + `.stop()` will be automatically called when a script that initialized the connection finishes + or on the destruction of Neptune context. + When using Neptune with Jupyter notebooks it's a good practice to stop the connection manually as it + will be stopped automatically only when the Jupyter kernel stops. + Args: + seconds (int or float, optional): Seconds to wait for all tracking calls to finish + before stopping the tracked run. + If `None` will wait for all tracking calls to finish. Defaults to `True`. + Examples: + If you are initializing the connection from a script you don't need to call `.stop()`: + >>> import neptune.new as neptune + >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") + >>> # Your code + ... pass + ... # If you are executing Python script .stop() + ... # is automatically called at the end for every Neptune object + If you are initializing multiple connection from one script it is a good practice + to .stop() the unneeded connections. You can also use Context Managers - Neptune + will automatically call .stop() on the destruction of Project context: + >>> import neptune.new as neptune + >>> # If you are initializing multiple connections from the same script + ... # stop the connection manually once not needed + ... for project_name in projects: + ... project = neptune.init_project(name=project_name) + ... # Your code + ... pass + ... project.stop() + >>> # You can also use with statement and context manager + ... for project_name in projects: + ... with neptune.init_project(name=project_name) as project: + ... # Your code + ... pass + ... # .stop() is automatically called + ... # when code execution exits the with statement + .. warning:: + If you are using Jupyter notebooks for connecting to a project you need to manually invoke `.stop()` + once the connection is not needed. + You may also want to check `stop docs page`_. + .. _stop docs page: + https://docs.neptune.ai/api-reference/project#.stop + """ + return MetadataContainer.stop(self, seconds=seconds) + + def get_structure(self) -> Dict[str, Any]: + """Returns a project's metadata structure in form of a dictionary. + This method can be used to traverse the project's metadata structure programmatically + when using Neptune in automated workflows. + .. danger:: + The returned object is a shallow copy of an internal structure. + Any modifications to it may result in tracking malfunction. + Returns: + ``dict``: with the project's metadata structure. + """ + return MetadataContainer.get_structure(self) + + def print_structure(self) -> None: + """Pretty prints the structure of the project's metadata. + Paths are ordered lexicographically and the whole structure is neatly colored. + """ + return MetadataContainer.print_structure(self) + + def pop(self, path: str, wait: bool = False) -> None: + """Removes the field or whole namespace stored under the path completely and all data associated with them. + Args: + path (str): Path of the field or namespace to be removed. + wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. + This makes the call synchronous. Defaults to `False`. + Examples: + >>> import neptune.new as neptune + >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") + >>> # Delete a field along with it's data + ... project.pop("datasets/v0.4") + >>> # .pop() can be invoked directly on fields and namespaces + >>> project['parameters/learning_rate'] = 0.3 + >>> # Following line + ... project.pop("datasets/v0.4") + >>> # is equiavlent to this line + ... project["datasets/v0.4"].pop() + >>> # or this line + ... project["datasets"].pop("v0.4") + >>> # You can also delete in batch whole namespace + ... project["datasets"].pop() + You may also want to check `pop docs page`_. + .. _pop docs page: + https://docs.neptune.ai/api-reference/project#.pop + """ + return MetadataContainer.pop(self, path=path, wait=wait) + + def wait(self, disk_only=False) -> None: + """Wait for all the tracking calls to finish. + Args: + disk_only (bool, optional, default is False): If `True` the process will only wait for data to be saved + locally from memory, but will not wait for them to reach Neptune servers. + Defaults to `False`. + You may also want to check `wait docs page`_. + .. _wait docs page: + https://docs.neptune.ai/api-reference/project#.wait + """ + return MetadataContainer.wait(self, disk_only=disk_only) + + def sync(self, wait: bool = True) -> None: + """Synchronizes local representation of the project with Neptune servers. + Args: + wait (bool, optional, default is True): If `True` the process will only wait for data to be saved + locally from memory, but will not wait for them to reach Neptune servers. + Defaults to `True`. + You may also want to check `sync docs page`_. + .. _sync docs page: + https://docs.neptune.ai/api-reference/project#.sync + """ + return MetadataContainer.sync(self, wait=wait) diff --git a/neptune/new/metadata_containers/run.py b/neptune/new/metadata_containers/run.py new file mode 100644 index 000000000..ad78f5a4c --- /dev/null +++ b/neptune/new/metadata_containers/run.py @@ -0,0 +1,316 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import threading +from typing import Any, Dict, Optional, Union + +from neptune.new.internal.id_formats import UniqueId, SysId +from neptune.new.metadata_containers import MetadataContainer +from neptune.new.internal.backends.neptune_backend import NeptuneBackend +from neptune.new.internal.background_job import BackgroundJob +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.operation_processors.operation_processor import ( + OperationProcessor, +) + + +class Run(MetadataContainer): + """A Run in Neptune is a representation of all metadata that you log to Neptune. + + Beginning when you start a tracked run with `neptune.init()` and ending when the script finishes + or when you explicitly stop the experiment with `.stop()`. + + You can log many ML metadata types, including: + * metrics + * losses + * model weights + * images + * interactive charts + * predictions + * and much more + + Examples: + >>> import neptune.new as neptune + + >>> # Create new experiment + ... run = neptune.init('my_workspace/my_project') + + >>> # Log parameters + ... params = {'max_epochs': 10, 'optimizer': 'Adam'} + ... run['parameters'] = params + + >>> # Log metadata + ... run['train/metric_name'].log() + >>> run['predictions'].log(image) + >>> run['model'].upload(path_to_model) + + >>> # Log whatever else you want + ... pass + + >>> # Stop tracking and clean up + ... run.stop() + + You may also want to check `Run docs page`_. + + .. _Run docs page: + https://docs.neptune.ai/api-reference/run + """ + + last_run = None # "static" instance of recently created Run + + container_type = ContainerType.RUN + + LEGACY_METHODS = ( + "create_experiment", + "send_metric", + "log_metric", + "send_text", + "log_text", + "send_image", + "log_image", + "send_artifact", + "log_artifact", + "delete_artifacts", + "download_artifact", + "download_sources", + "download_artifacts", + "reset_log", + "get_parameters", + "get_properties", + "set_property", + "remove_property", + "get_hardware_utilization", + "get_numeric_channels_values", + ) + + def __init__( + self, + *, + id_: UniqueId, + backend: NeptuneBackend, + op_processor: OperationProcessor, + background_job: BackgroundJob, + lock: threading.RLock, + workspace: str, + project_name: str, + sys_id: SysId, + project_id: UniqueId, + monitoring_namespace: str = "monitoring", + ): + super().__init__( + id_=id_, + backend=backend, + op_processor=op_processor, + background_job=background_job, + lock=lock, + project_id=project_id, + project_name=project_name, + workspace=workspace, + sys_id=sys_id, + ) + self.monitoring_namespace = monitoring_namespace + + Run.last_run = self + + @property + def _docs_url_stop(self) -> str: + return "https://docs.neptune.ai/api-reference/run#.stop" + + @property + def _label(self) -> str: + return self._sys_id + + def get_run_url(self) -> str: + """Returns the URL the run can be accessed with in the browser""" + return self._url + + @property + def _url(self) -> str: + return self._backend.get_run_url( + run_id=self._id, + workspace=self._workspace, + project_name=self._project_name, + sys_id=self._sys_id, + ) + + @property + def _short_id(self) -> str: + return self._sys_id + + def assign(self, value, wait: bool = False) -> None: + """Assign values to multiple fields from a dictionary. + You can use this method to quickly log all run's parameters. + Args: + value (dict): A dictionary with values to assign, where keys become the paths of the fields. + The dictionary can be nested - in such case the path will be a combination of all keys. + wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. + This makes the call synchronous. Defaults to `False`. + Examples: + >>> import neptune.new as neptune + >>> run = neptune.init_run() + >>> # Assign multiple fields from a dictionary + ... params = {"max_epochs": 10, "optimizer": "Adam"} + >>> run["parameters"] = params + >>> # You can always log explicitly parameters one by one + ... run["parameters/max_epochs"] = 10 + >>> run["parameters/optimizer"] = "Adam" + >>> # Dictionaries can be nested + ... params = {"train": {"max_epochs": 10}} + >>> run["parameters"] = params + >>> # This will log 10 under path "parameters/train/max_epochs" + You may also want to check `assign docs page`_. + .. _assign docs page: + https://docs.neptune.ai/api-reference/run#.assign + """ + return MetadataContainer.assign(self, value=value, wait=wait) + + def fetch(self) -> dict: + """Fetch values of all non-File Atom fields as a dictionary. + The result will preserve the hierarchical structure of the run's metadata, but will contain only non-File Atom + fields. + You can use this method to quickly retrieve previous run's parameters. + Returns: + `dict` containing all non-File Atom fields values. + Examples: + >>> import neptune.new as neptune + >>> resumed_run = neptune.init(run="HEL-3") + >>> params = resumed_run['model/parameters'].fetch() + >>> run_data = resumed_run.fetch() + >>> print(run_data) + >>> # this will print out all Atom attributes stored in run as a dict + You may also want to check `fetch docs page`_. + .. _fetch docs page: + https://docs.neptune.ai/api-reference/run#.fetch + """ + return MetadataContainer.fetch(self) + + def stop(self, seconds: Optional[Union[float, int]] = None) -> None: + """Stops the tracked run and kills the synchronization thread. + `.stop()` will be automatically called when a script that created the run finishes or on the destruction + of Neptune context. + When using Neptune with Jupyter notebooks it's a good practice to stop the tracked run manually as it + will be stopped automatically only when the Jupyter kernel stops. + Args: + seconds (int or float, optional): Seconds to wait for all tracking calls to finish + before stopping the tracked run. + If `None` will wait for all tracking calls to finish. Defaults to `True`. + Examples: + If you are creating tracked runs from the script you don't need to call `.stop()`: + >>> import neptune.new as neptune + >>> run = neptune.init() + >>> # Your training or monitoring code + ... pass + ... # If you are executing Python script .stop() + ... # is automatically called at the end for every run + If you are performing multiple training jobs from one script one after the other it is a good practice + to `.stop()` the finished tracked runs as every open run keeps an open connection with Neptune, + monitors hardware usage, etc. You can also use Context Managers - Neptune will automatically call `.stop()` + on the destruction of Run context: + >>> import neptune.new as neptune + >>> # If you are running consecutive training jobs from the same script + ... # stop the tracked runs manually at the end of single training job + ... for config in configs: + ... run = neptune.init() + ... # Your training or monitoring code + ... pass + ... run.stop() + >>> # You can also use with statement and context manager + ... for config in configs: + ... with neptune.init() as run: + ... # Your training or monitoring code + ... pass + ... # .stop() is automatically called + ... # when code execution exits the with statement + .. warning:: + If you are using Jupyter notebooks for creating your runs you need to manually invoke `.stop()` once the + training and evaluation is done. + You may also want to check `stop docs page`_. + .. _stop docs page: + https://docs.neptune.ai/api-reference/run#.stop + """ + return MetadataContainer.stop(self, seconds=seconds) + + def get_structure(self) -> Dict[str, Any]: + """Returns a run's metadata structure in form of a dictionary. + This method can be used to traverse the run's metadata structure programmatically + when using Neptune in automated workflows. + .. danger:: + The returned object is a deep copy of an internal run's structure. + Returns: + ``dict``: with the run's metadata structure. + """ + return MetadataContainer.get_structure(self) + + def print_structure(self) -> None: + """Pretty prints the structure of the run's metadata. + Paths are ordered lexicographically and the whole structure is neatly colored. + """ + return MetadataContainer.print_structure(self) + + def pop(self, path: str, wait: bool = False) -> None: + """Removes the field stored under the path completely and all data associated with it. + Args: + path (str): Path of the field to be removed. + wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. + This makes the call synchronous. Defaults to `True`. + Examples: + >>> import neptune.new as neptune + >>> run = neptune.init() + >>> run['parameters/learninggg_rata'] = 0.3 + >>> # Delete a field along with it's data + ... run.pop('parameters/learninggg_rata') + >>> run['parameters/learning_rate'] = 0.3 + >>> # Training finished + ... run['trained_model'].upload('model.pt') + >>> # 'model_checkpoint' is a File field + ... run.pop('model_checkpoint') + You may also want to check `pop docs page`_. + .. _pop docs page: + https://docs.neptune.ai/api-reference/run#.pop + """ + return MetadataContainer.pop(self, path=path, wait=wait) + + def wait(self, disk_only=False) -> None: + """Wait for all the tracking calls to finish. + Args: + disk_only (bool, optional, default is False): If `True` the process will only wait for data to be saved + locally from memory, but will not wait for them to reach Neptune servers. + Defaults to `False`. + You may also want to check `wait docs page`_. + .. _wait docs page: + https://docs.neptune.ai/api-reference/run#.wait + """ + return MetadataContainer.wait(self, disk_only=disk_only) + + def sync(self, wait: bool = True) -> None: + """Synchronizes local representation of the run with Neptune servers. + Args: + wait (bool, optional, default is True): If `True` the process will only wait for data to be saved + locally from memory, but will not wait for them to reach Neptune servers. + Defaults to `True`. + Examples: + >>> import neptune.new as neptune + >>> # Connect to a run from Worker #3 + ... worker_id = 3 + >>> run = neptune.init(run='DIST-43', monitoring_namespace='monitoring/{}'.format(worker_id)) + >>> # Try to access logs that were created in meantime by Worker #2 + ... worker_2_status = run['status/2'].fetch() # Error if this field was created after this script starts + >>> run.sync() # Synchronizes local representation with Neptune servers. + >>> worker_2_status = run['status/2'].fetch() # No error + You may also want to check `sync docs page`_. + .. _sync docs page: + https://docs.neptune.ai/api-reference/run#.sync + """ + return MetadataContainer.sync(self, wait=wait) diff --git a/neptune/new/project.py b/neptune/new/project.py index 22a3935d6..3b5c58853 100644 --- a/neptune/new/project.py +++ b/neptune/new/project.py @@ -13,354 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import threading -from typing import Union, Optional, Iterable, Dict, Any -from neptune.new.attribute_container import AttributeContainer -from neptune.new.internal.backends.neptune_backend import NeptuneBackend -from neptune.new.internal.background_job import BackgroundJob -from neptune.new.internal.container_type import ContainerType -from neptune.new.internal.operation_processors.operation_processor import ( - OperationProcessor, -) -from neptune.new.internal.utils import verify_type, verify_collection_type -from neptune.new.runs_table import RunsTable - - -class Project(AttributeContainer): - """A class for managing a Neptune project and retrieving information from it. - - You may also want to check `Project docs page`_. - - .. _Project docs page: - https://docs.neptune.ai/api-reference/project - """ - - container_type = ContainerType.PROJECT - - def __init__( - self, - _id: str, - backend: NeptuneBackend, - op_processor: OperationProcessor, - background_job: BackgroundJob, - lock: threading.RLock, - workspace: str, - project_name: str, - ): - super().__init__( - _id, - backend, - op_processor, - background_job, - lock, - project_id=_id, - project_name=project_name, - workspace=workspace, - ) - - @property - def _docs_url_stop(self) -> str: - return "https://docs.neptune.ai/api-reference/project#.stop" - - @property - def _label(self) -> str: - return f"{self._workspace}/{self._project_name}" - - # pylint:disable=redefined-builtin - def fetch_runs_table( - self, - id: Optional[Union[str, Iterable[str]]] = None, - state: Optional[Union[str, Iterable[str]]] = None, - owner: Optional[Union[str, Iterable[str]]] = None, - tag: Optional[Union[str, Iterable[str]]] = None, - ) -> RunsTable: - """Retrieve runs matching the specified criteria. - - All parameters are optional, each of them specifies a single criterion. - Only runs matching all of the criteria will be returned. - - Args: - id (str or list of str, optional): A run's id or list of ids. - E.g. `'SAN-1'` or `['SAN-1', 'SAN-2']`. - Matching any element of the list is sufficient to pass the criterion. - Defaults to `None`. - state (str or list of str, optional): A run's state like or list of states. - E.g. `'running'` or `['idle', 'running']`. - Possible values: 'idle', 'running'. - Defaults to `None`. - Matching any element of the list is sufficient to pass the criterion. - owner (str or list of str, optional): Username of the run's owner or a list of owners. - E.g. 'josh' or ['frederic', 'josh']. - The user who created the tracked run is an owner. - Defaults to `None`. - Matching any element of the list is sufficient to pass the criterion. - tag (str or list of str, optional): An experiment tag or list of tags. - E.g. `'lightGBM'` or ['pytorch', 'cycleLR']. - Defaults to `None`. - Only experiments that have all specified tags will match this criterion. - - Returns: - ``RunsTable``: object containing experiments matching the specified criteria. - - Use `.to_pandas()` to convert it to Pandas `DataFrame`. - - Examples: - >>> import neptune.new as neptune - - >>> # Fetch project 'jackie/sandbox' - ... project = neptune.get_project(name='jackie/sandbox') - - >>> # Fetch all Runs metadata as Pandas DataFrame - ... runs_table_df = project.fetch_runs_table().to_pandas() - - >>> # Sort runs by creation time - ... runs_table_df = runs_table_df.sort_values(by='sys/creation_time', ascending=False) - - >>> # Extract the last runs id - ... last_run_id = runs_table_df['sys/id'].values[0] - - You can also filter the runs table by state, owner or tag or a combination: - - >>> # Fetch only inactive runs - ... runs_table_df = project.fetch_runs_table(state='idle').to_pandas() - - >>> # Fetch only runs created by CI service - ... runs_table_df = project.fetch_runs_table(owner='my_company_ci_service').to_pandas() - - >>> # Fetch only runs that have both 'Exploration' and 'Optuna' tag - ... runs_table_df = project.fetch_runs_table(tag=['Exploration', 'Optuna']).to_pandas() - - >>> # You can combine conditions. Runs satisfying all conditions will be fetched - ... runs_table_df = project.fetch_runs_table(state='idle', tag='Exploration').to_pandas() - - You may also want to check `fetch_runs_table docs page`_. - - .. _fetch_runs_table docs page: - https://docs.neptune.ai/api-reference/project#.fetch_runs_table - """ - id = self._as_list("id", id) - state = self._as_list("state", state) - owner = self._as_list("owner", owner) - tags = self._as_list("tag", tag) - - leaderboard_entries = self._backend.get_leaderboard( - self._id, id, state, owner, tags - ) - - return RunsTable(self._backend, leaderboard_entries) - - @staticmethod - def _as_list( - name: str, value: Optional[Union[str, Iterable[str]]] - ) -> Optional[Iterable[str]]: - verify_type(name, value, (type(None), str, Iterable)) - if value is None: - return None - if isinstance(value, str): - return [value] - verify_collection_type(name, value, str) - return value - - def assign(self, value, wait: bool = False) -> None: - """Assign values to multiple fields from a dictionary. - You can use this method to log multiple pieces of information with one command. - - Args: - value (dict): A dictionary with values to assign, where keys become the paths of the fields. - The dictionary can be nested - in such case the path will be a combination of all keys. - wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. - This makes the call synchronous. Defaults to `False`. - - Examples: - >>> import neptune.new as neptune - >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") - - >>> # Assign multiple fields from a dictionary - ... general_info = {"brief": URL_TO_PROJECT_BRIEF, "deadline": "2049-06-30"} - >>> project["general"] = general_info - - >>> # You can always log explicitly parameters one by one - ... project["general/brief"] = URL_TO_PROJECT_BRIEF - >>> project["general/deadline"] = "2049-06-30" - - >>> # Dictionaries can be nested - ... general_info = {"brief": {"url": URL_TO_PROJECT_BRIEF}} - >>> project["general"] = general_info - >>> # This will log the url under path "general/brief/url" - - You may also want to check `assign docs page`_. - - .. _assign docs page: - https://docs.neptune.ai/api-reference/project#.assign - """ - return AttributeContainer.assign(self, value=value, wait=wait) - - def fetch(self) -> dict: - """Fetch values of all non-File Atom fields as a dictionary. - The result will preserve the hierarchical structure of the projects's metadata - but will contain only non-File Atom fields. - - Returns: - `dict` containing all non-File Atom fields values. - - Examples: - >>> import neptune.new as neptune - >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") - - >>> # Fetch all the project metrics - >>> project_metrics = project["metrics"].fetch() - - You may also want to check `fetch docs page`_. - - .. _fetch docs page: - https://docs.neptune.ai/api-reference/project#.fetch - """ - return AttributeContainer.fetch(self) - - def stop(self, seconds: Optional[Union[float, int]] = None) -> None: - """Stops the connection to the project and kills the synchronization thread. - - `.stop()` will be automatically called when a script that initialized the connection finishes - or on the destruction of Neptune context. - - When using Neptune with Jupyter notebooks it's a good practice to stop the connection manually as it - will be stopped automatically only when the Jupyter kernel stops. - - Args: - seconds (int or float, optional): Seconds to wait for all tracking calls to finish - before stopping the tracked run. - If `None` will wait for all tracking calls to finish. Defaults to `True`. - - Examples: - If you are initializing the connection from a script you don't need to call `.stop()`: - - >>> import neptune.new as neptune - >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") - - >>> # Your code - ... pass - ... # If you are executing Python script .stop() - ... # is automatically called at the end for every Neptune object - - If you are initializing multiple connection from one script it is a good practice - to .stop() the unneeded connections. You can also use Context Managers - Neptune - will automatically call .stop() on the destruction of Project context: - - >>> import neptune.new as neptune - - >>> # If you are initializing multiple connections from the same script - ... # stop the connection manually once not needed - ... for project_name in projects: - ... project = neptune.init_project(name=project_name) - ... # Your code - ... pass - ... project.stop() - - >>> # You can also use with statement and context manager - ... for project_name in projects: - ... with neptune.init_project(name=project_name) as project: - ... # Your code - ... pass - ... # .stop() is automatically called - ... # when code execution exits the with statement - - .. warning:: - If you are using Jupyter notebooks for connecting to a project you need to manually invoke `.stop()` - once the connection is not needed. - - You may also want to check `stop docs page`_. - - .. _stop docs page: - https://docs.neptune.ai/api-reference/project#.stop - """ - return AttributeContainer.stop(self, seconds=seconds) - - def get_structure(self) -> Dict[str, Any]: - """Returns a project's metadata structure in form of a dictionary. - - This method can be used to traverse the project's metadata structure programmatically - when using Neptune in automated workflows. - - .. danger:: - The returned object is a shallow copy of an internal structure. - Any modifications to it may result in tracking malfunction. - - Returns: - ``dict``: with the project's metadata structure. - - """ - return AttributeContainer.get_structure(self) - - def print_structure(self) -> None: - """Pretty prints the structure of the project's metadata. - - Paths are ordered lexicographically and the whole structure is neatly colored. - """ - return AttributeContainer.print_structure(self) - - def pop(self, path: str, wait: bool = False) -> None: - """Removes the field or whole namespace stored under the path completely and all data associated with them. - - Args: - path (str): Path of the field or namespace to be removed. - wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. - This makes the call synchronous. Defaults to `False`. - - Examples: - >>> import neptune.new as neptune - >>> project = neptune.init_project(name="MY_WORKSPACE/MY_PROJECT") - - >>> # Delete a field along with it's data - ... project.pop("datasets/v0.4") - - >>> # .pop() can be invoked directly on fields and namespaces - - >>> project['parameters/learning_rate'] = 0.3 - - >>> # Following line - ... project.pop("datasets/v0.4") - >>> # is equiavlent to this line - ... project["datasets/v0.4"].pop() - >>> # or this line - ... project["datasets"].pop("v0.4") - - >>> # You can also delete in batch whole namespace - ... project["datasets"].pop() - - You may also want to check `pop docs page`_. - - .. _pop docs page: - https://docs.neptune.ai/api-reference/project#.pop - """ - return AttributeContainer.pop(self, path=path, wait=wait) - - def wait(self, disk_only=False) -> None: - """Wait for all the tracking calls to finish. - - Args: - disk_only (bool, optional, default is False): If `True` the process will only wait for data to be saved - locally from memory, but will not wait for them to reach Neptune servers. - Defaults to `False`. - - You may also want to check `wait docs page`_. - - .. _wait docs page: - https://docs.neptune.ai/api-reference/project#.wait - """ - return AttributeContainer.wait(self, disk_only=disk_only) - - def sync(self, wait: bool = True) -> None: - """Synchronizes local representation of the project with Neptune servers. - - Args: - wait (bool, optional, default is True): If `True` the process will only wait for data to be saved - locally from memory, but will not wait for them to reach Neptune servers. - Defaults to `True`. - - You may also want to check `sync docs page`_. - - .. _sync docs page: - https://docs.neptune.ai/api-reference/project#.sync - """ - return AttributeContainer.sync(self, wait=wait) +# backwards compatibility +# pylint: disable=unused-import +from neptune.new.metadata_containers import Project diff --git a/neptune/new/run.py b/neptune/new/run.py index d2e534600..ded8ff29a 100644 --- a/neptune/new/run.py +++ b/neptune/new/run.py @@ -13,22 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import threading -from typing import Union, Optional, Dict, Any - -import click - -from neptune.new.attribute_container import AttributeContainer -from neptune.new.internal.backends.neptune_backend import NeptuneBackend -from neptune.new.internal.background_job import BackgroundJob -from neptune.new.internal.container_type import ContainerType -from neptune.new.internal.operation_processors.operation_processor import ( - OperationProcessor, -) -from neptune.new.internal.state import ContainerState # backwards compatibility # pylint: disable=unused-import + +from neptune.new.metadata_containers import Run +from neptune.new.handler import Handler +from neptune.new.internal.state import ContainerState as RunState from neptune.new.attributes.attribute import Attribute from neptune.new.attributes.namespace import ( NamespaceBuilder, @@ -39,359 +30,9 @@ InactiveRunException, NeptunePossibleLegacyUsageException, ) -from neptune.new.handler import Handler -from neptune.new.internal.backends.api_model import AttributeType -from neptune.new.internal.operation import DeleteAttribute -from neptune.new.internal.run_structure import ContainerStructure as RunStructure -from neptune.new.internal.utils import ( - is_bool, - is_float, - is_float_like, - is_int, - is_string, - is_string_like, - verify_type, - is_dict_like, -) -from neptune.new.internal.utils.paths import parse_path -from neptune.new.internal.value_to_attribute_visitor import ValueToAttributeVisitor from neptune.new.types import Boolean, Integer from neptune.new.types.atoms.datetime import Datetime from neptune.new.types.atoms.float import Float from neptune.new.types.atoms.string import String from neptune.new.types.namespace import Namespace from neptune.new.types.value import Value - -RunState = ContainerState - - -class Run(AttributeContainer): - """A Run in Neptune is a representation of all metadata that you log to Neptune. - - Beginning when you start a tracked run with `neptune.init()` and ending when the script finishes - or when you explicitly stop the experiment with `.stop()`. - - You can log many ML metadata types, including: - * metrics - * losses - * model weights - * images - * interactive charts - * predictions - * and much more - - Examples: - >>> import neptune.new as neptune - - >>> # Create new experiment - ... run = neptune.init('my_workspace/my_project') - - >>> # Log parameters - ... params = {'max_epochs': 10, 'optimizer': 'Adam'} - ... run['parameters'] = params - - >>> # Log metadata - ... run['train/metric_name'].log() - >>> run['predictions'].log(image) - >>> run['model'].upload(path_to_model) - - >>> # Log whatever else you want - ... pass - - >>> # Stop tracking and clean up - ... run.stop() - - You may also want to check `Run docs page`_. - - .. _Run docs page: - https://docs.neptune.ai/api-reference/run - """ - - last_run = None # "static" instance of recently created Run - - container_type = ContainerType.RUN - - LEGACY_METHODS = ( - "create_experiment", - "send_metric", - "log_metric", - "send_text", - "log_text", - "send_image", - "log_image", - "send_artifact", - "log_artifact", - "delete_artifacts", - "download_artifact", - "download_sources", - "download_artifacts", - "reset_log", - "get_parameters", - "get_properties", - "set_property", - "remove_property", - "get_hardware_utilization", - "get_numeric_channels_values", - ) - - def __init__( - self, - _id: str, - backend: NeptuneBackend, - op_processor: OperationProcessor, - background_job: BackgroundJob, - lock: threading.RLock, - workspace: str, - project_name: str, - short_id: str, - project_id: str, - monitoring_namespace: str = "monitoring", - ): - super().__init__( - _id, - backend, - op_processor, - background_job, - lock, - project_id, - project_name, - workspace, - ) - self._short_id = short_id - self.monitoring_namespace = monitoring_namespace - - Run.last_run = self - - @property - def _label(self) -> str: - return self._short_id - - @property - def _docs_url_stop(self) -> str: - return "https://docs.neptune.ai/api-reference/run#.stop" - - def get_run_url(self) -> str: - """Returns the URL the run can be accessed with in the browser""" - return self._backend.get_run_url( - self._id, self._workspace, self._project_name, self._short_id - ) - - def _startup(self, debug_mode): - if not debug_mode: - click.echo(self.get_run_url()) - super()._startup(debug_mode) - - def assign(self, value, wait: bool = False) -> None: - """Assign values to multiple fields from a dictionary. - You can use this method to quickly log all run's parameters. - - Args: - value (dict): A dictionary with values to assign, where keys become the paths of the fields. - The dictionary can be nested - in such case the path will be a combination of all keys. - wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. - This makes the call synchronous. Defaults to `False`. - - Examples: - >>> import neptune.new as neptune - >>> run = neptune.init_run() - - >>> # Assign multiple fields from a dictionary - ... params = {"max_epochs": 10, "optimizer": "Adam"} - >>> run["parameters"] = params - - >>> # You can always log explicitly parameters one by one - ... run["parameters/max_epochs"] = 10 - >>> run["parameters/optimizer"] = "Adam" - - >>> # Dictionaries can be nested - ... params = {"train": {"max_epochs": 10}} - >>> run["parameters"] = params - >>> # This will log 10 under path "parameters/train/max_epochs" - - You may also want to check `assign docs page`_. - - .. _assign docs page: - https://docs.neptune.ai/api-reference/run#.assign - """ - return AttributeContainer.assign(self, value=value, wait=wait) - - def fetch(self) -> dict: - """Fetch values of all non-File Atom fields as a dictionary. - The result will preserve the hierarchical structure of the run's metadata, but will contain only non-File Atom - fields. - You can use this method to quickly retrieve previous run's parameters. - - Returns: - `dict` containing all non-File Atom fields values. - - Examples: - >>> import neptune.new as neptune - >>> resumed_run = neptune.init(run="HEL-3") - >>> params = resumed_run['model/parameters'].fetch() - - >>> run_data = resumed_run.fetch() - - >>> print(run_data) - >>> # this will print out all Atom attributes stored in run as a dict - - You may also want to check `fetch docs page`_. - - .. _fetch docs page: - https://docs.neptune.ai/api-reference/run#.fetch - """ - return AttributeContainer.fetch(self) - - def stop(self, seconds: Optional[Union[float, int]] = None) -> None: - """Stops the tracked run and kills the synchronization thread. - - `.stop()` will be automatically called when a script that created the run finishes or on the destruction - of Neptune context. - - When using Neptune with Jupyter notebooks it's a good practice to stop the tracked run manually as it - will be stopped automatically only when the Jupyter kernel stops. - - Args: - seconds (int or float, optional): Seconds to wait for all tracking calls to finish - before stopping the tracked run. - If `None` will wait for all tracking calls to finish. Defaults to `True`. - - Examples: - If you are creating tracked runs from the script you don't need to call `.stop()`: - - >>> import neptune.new as neptune - >>> run = neptune.init() - - >>> # Your training or monitoring code - ... pass - ... # If you are executing Python script .stop() - ... # is automatically called at the end for every run - - If you are performing multiple training jobs from one script one after the other it is a good practice - to `.stop()` the finished tracked runs as every open run keeps an open connection with Neptune, - monitors hardware usage, etc. You can also use Context Managers - Neptune will automatically call `.stop()` - on the destruction of Run context: - - >>> import neptune.new as neptune - - >>> # If you are running consecutive training jobs from the same script - ... # stop the tracked runs manually at the end of single training job - ... for config in configs: - ... run = neptune.init() - ... # Your training or monitoring code - ... pass - ... run.stop() - - >>> # You can also use with statement and context manager - ... for config in configs: - ... with neptune.init() as run: - ... # Your training or monitoring code - ... pass - ... # .stop() is automatically called - ... # when code execution exits the with statement - - .. warning:: - If you are using Jupyter notebooks for creating your runs you need to manually invoke `.stop()` once the - training and evaluation is done. - - You may also want to check `stop docs page`_. - - .. _stop docs page: - https://docs.neptune.ai/api-reference/run#.stop - """ - return AttributeContainer.stop(self, seconds=seconds) - - def get_structure(self) -> Dict[str, Any]: - """Returns a run's metadata structure in form of a dictionary. - - This method can be used to traverse the run's metadata structure programmatically - when using Neptune in automated workflows. - - .. danger:: - The returned object is a deep copy of an internal run's structure. - - Returns: - ``dict``: with the run's metadata structure. - - """ - return AttributeContainer.get_structure(self) - - def print_structure(self) -> None: - """Pretty prints the structure of the run's metadata. - - Paths are ordered lexicographically and the whole structure is neatly colored. - """ - return AttributeContainer.print_structure(self) - - def pop(self, path: str, wait: bool = False) -> None: - """Removes the field stored under the path completely and all data associated with it. - - Args: - path (str): Path of the field to be removed. - wait (bool, optional): If `True` the client will first wait to send all tracked metadata to the server. - This makes the call synchronous. Defaults to `True`. - - Examples: - >>> import neptune.new as neptune - >>> run = neptune.init() - - >>> run['parameters/learninggg_rata'] = 0.3 - - >>> # Delete a field along with it's data - ... run.pop('parameters/learninggg_rata') - - >>> run['parameters/learning_rate'] = 0.3 - - >>> # Training finished - ... run['trained_model'].upload('model.pt') - >>> # 'model_checkpoint' is a File field - ... run.pop('model_checkpoint') - - You may also want to check `pop docs page`_. - - .. _pop docs page: - https://docs.neptune.ai/api-reference/run#.pop - """ - return AttributeContainer.pop(self, path=path, wait=wait) - - def wait(self, disk_only=False) -> None: - """Wait for all the tracking calls to finish. - - Args: - disk_only (bool, optional, default is False): If `True` the process will only wait for data to be saved - locally from memory, but will not wait for them to reach Neptune servers. - Defaults to `False`. - - You may also want to check `wait docs page`_. - - .. _wait docs page: - https://docs.neptune.ai/api-reference/run#.wait - """ - return AttributeContainer.wait(self, disk_only=disk_only) - - def sync(self, wait: bool = True) -> None: - """Synchronizes local representation of the run with Neptune servers. - - Args: - wait (bool, optional, default is True): If `True` the process will only wait for data to be saved - locally from memory, but will not wait for them to reach Neptune servers. - Defaults to `True`. - - Examples: - >>> import neptune.new as neptune - - >>> # Connect to a run from Worker #3 - ... worker_id = 3 - >>> run = neptune.init(run='DIST-43', monitoring_namespace='monitoring/{}'.format(worker_id)) - - >>> # Try to access logs that were created in meantime by Worker #2 - ... worker_2_status = run['status/2'].fetch() # Error if this field was created after this script starts - - >>> run.sync() # Synchronizes local representation with Neptune servers. - >>> worker_2_status = run['status/2'].fetch() # No error - - You may also want to check `sync docs page`_. - - .. _sync docs page: - https://docs.neptune.ai/api-reference/run#.sync - """ - return AttributeContainer.sync(self, wait=wait) diff --git a/neptune/new/runs_table.py b/neptune/new/runs_table.py index b70ca30b9..13f3557a8 100644 --- a/neptune/new/runs_table.py +++ b/neptune/new/runs_table.py @@ -13,212 +13,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import logging -from datetime import datetime -from typing import List, Dict, Optional, Union +# backwards compatibility +# pylint: disable=unused-import,wrong-import-order +from neptune.new.internal.backends.api_model import AttributeType +from neptune.new.metadata_containers.metadata_containers_table import ( + LeaderboardEntry, + LeaderboardHandler, +) from neptune.new.exceptions import MetadataInconsistency + +from neptune.new.metadata_containers.metadata_containers_table import ( + Table as RunsTable, + TableEntry as RunsTableEntry, +) from neptune.new.internal.backends.api_model import ( - LeaderboardEntry, AttributeWithProperties, - AttributeType, ) from neptune.new.internal.backends.neptune_backend import NeptuneBackend from neptune.new.internal.container_type import ContainerType -from neptune.new.internal.utils.paths import join_paths, parse_path - -logger = logging.getLogger(__name__) - - -class RunsTableEntry: - def __init__( - self, - backend: NeptuneBackend, - _id: str, - attributes: List[AttributeWithProperties], - ): - self._backend = backend - self._id = _id - self._attributes = attributes - - def __getitem__(self, path: str) -> "LeaderboardHandler": - return LeaderboardHandler(self, path) - - def get_attribute_type(self, path: str) -> AttributeType: - for attr in self._attributes: - if attr.path == path: - return attr.type - raise ValueError("Could not find {} attribute".format(path)) - - def get_attribute_value(self, path: str): - for attr in self._attributes: - if attr.path == path: - _type = attr.type - if _type == AttributeType.RUN_STATE: - return attr.properties.value - if _type in ( - AttributeType.FLOAT, - AttributeType.INT, - AttributeType.BOOL, - AttributeType.STRING, - AttributeType.DATETIME, - ): - return attr.properties.value - if ( - _type == AttributeType.FLOAT_SERIES - or _type == AttributeType.STRING_SERIES - ): - return attr.properties.last - if _type == AttributeType.IMAGE_SERIES: - raise MetadataInconsistency("Cannot get value for image series.") - if _type == AttributeType.FILE: - raise MetadataInconsistency( - "Cannot get value for file attribute. Use download() instead." - ) - if _type == AttributeType.FILE_SET: - raise MetadataInconsistency( - "Cannot get value for file set attribute. Use download() instead." - ) - if _type == AttributeType.STRING_SET: - return set(attr.properties.values) - if _type == AttributeType.GIT_REF: - return attr.properties.commit.commitId - if _type == AttributeType.NOTEBOOK_REF: - return attr.properties.notebookName - if _type == AttributeType.ARTIFACT: - return attr.properties.hash - logger.error( - "Attribute type %s not supported in this version, yielding None. Recommended client upgrade.", - _type, - ) - return None - raise ValueError("Could not find {} attribute".format(path)) - - def download_file_attribute(self, path: str, destination: Optional[str]): - for attr in self._attributes: - if attr.path == path: - _type = attr.type - if _type == AttributeType.FILE: - self._backend.download_file( - self._id, ContainerType.RUN, parse_path(path), destination - ) - return - raise MetadataInconsistency( - "Cannot download file from attribute of type {}".format(_type) - ) - raise ValueError("Could not find {} attribute".format(path)) - - def download_file_set_attribute(self, path: str, destination: Optional[str]): - for attr in self._attributes: - if attr.path == path: - _type = attr.type - if _type == AttributeType.FILE_SET: - self._backend.download_file_set( - self._id, ContainerType.RUN, parse_path(path), destination - ) - return - raise MetadataInconsistency( - "Cannot download ZIP archive from attribute of type {}".format( - _type - ) - ) - raise ValueError("Could not find {} attribute".format(path)) - - -class LeaderboardHandler: - def __init__(self, run: RunsTableEntry, path: str): - self._run = run - self._path = path - - def __getitem__(self, path: str) -> "LeaderboardHandler": - return LeaderboardHandler(self._run, join_paths(self._path, path)) - - def get(self): - return self._run.get_attribute_value(self._path) - - def download(self, destination: Optional[str]): - attr_type = self._run.get_attribute_type(self._path) - if attr_type == AttributeType.FILE: - return self._run.download_file_attribute(self._path, destination) - elif attr_type == AttributeType.FILE_SET: - return self._run.download_file_set_attribute(self._path, destination) - raise MetadataInconsistency( - "Cannot download file from attribute of type {}".format(attr_type) - ) - - -class RunsTable: - def __init__(self, backend: NeptuneBackend, entries: List[LeaderboardEntry]): - self._backend = backend - self._entries = entries - - def to_runs(self) -> List[RunsTableEntry]: - return [ - RunsTableEntry(self._backend, e.id, e.attributes) for e in self._entries - ] - - def to_pandas(self): - # pylint:disable=import-outside-toplevel - import pandas as pd - - def make_attribute_value( - attribute: AttributeWithProperties, - ) -> Optional[Union[str, float, datetime]]: - _type = attribute.type - _properties = attribute.properties - if _type == AttributeType.RUN_STATE: - return _properties.value - if _type in ( - AttributeType.FLOAT, - AttributeType.INT, - AttributeType.BOOL, - AttributeType.STRING, - AttributeType.DATETIME, - ): - return _properties.value - if ( - _type == AttributeType.FLOAT_SERIES - or _type == AttributeType.STRING_SERIES - ): - return _properties.last - if _type == AttributeType.IMAGE_SERIES: - return None - if _type == AttributeType.FILE or _type == AttributeType.FILE_SET: - return None - if _type == AttributeType.STRING_SET: - return ",".join(_properties.values) - if _type == AttributeType.GIT_REF: - return _properties.commit.commitId - if _type == AttributeType.NOTEBOOK_REF: - return _properties.notebookName - if _type == AttributeType.ARTIFACT: - return _properties.hash - logger.error( - "Attribute type %s not supported in this version, yielding None. Recommended client upgrade.", - _type, - ) - return None - - def make_row( - entry: LeaderboardEntry, - ) -> Dict[str, Optional[Union[str, float, datetime]]]: - row: Dict[str, Union[str, float, datetime]] = dict() - for attr in entry.attributes: - value = make_attribute_value(attr) - if value is not None: - row[attr.path] = value - return row - - def sort_key(attr): - domain = attr.split("/")[0] - if domain == "sys": - return 0, attr - if domain == "monitoring": - return 2, attr - return 1, attr - - rows = dict((n, make_row(entry)) for (n, entry) in enumerate(self._entries)) - - df = pd.DataFrame.from_dict(data=rows, orient="index") - df = df.reindex(sorted(df.columns, key=sort_key), axis="columns") - return df diff --git a/neptune/new/sync.py b/neptune/new/sync.py deleted file mode 100644 index cfb6c92b9..000000000 --- a/neptune/new/sync.py +++ /dev/null @@ -1,532 +0,0 @@ -# -# Copyright (c) 2020, Neptune Labs Sp. z o.o. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import logging -import os -import sys -import textwrap -import threading -import time -import uuid -from pathlib import Path -from typing import Any, Iterable, List, Optional, Sequence, Tuple - -import click - -from neptune.new.constants import ( - ASYNC_DIRECTORY, - NEPTUNE_DATA_DIRECTORY, - OFFLINE_DIRECTORY, - OFFLINE_NAME_PREFIX, -) -from neptune.new.envs import NEPTUNE_SYNC_BATCH_TIMEOUT_ENV, PROJECT_ENV_NAME -from neptune.new.exceptions import ( - CannotSynchronizeOfflineRunsWithoutProject, - NeptuneConnectionLostException, - NeptuneException, - ProjectNotFound, - RunNotFound, -) -from neptune.new.internal.backends.api_model import ApiRun, Project -from neptune.new.internal.backends.hosted_neptune_backend import HostedNeptuneBackend -from neptune.new.internal.backends.neptune_backend import NeptuneBackend -from neptune.new.internal.container_type import ContainerType -from neptune.new.internal.containers.disk_queue import DiskQueue -from neptune.new.internal.credentials import Credentials -from neptune.new.internal.operation import Operation -from neptune.new.internal.utils.container_type_file import ContainerTypeFile - -####################################################################################################################### -# Run and Project utilities -####################################################################################################################### - - -# Set in CLI entry points block, patched in tests -backend: NeptuneBackend = None - -retries_timeout = int(os.getenv(NEPTUNE_SYNC_BATCH_TIMEOUT_ENV, "3600")) - - -def get_run(run_id: str) -> Optional[ApiRun]: - try: - return backend.get_run(run_id) - except RunNotFound: - return None - except NeptuneException as e: - click.echo( - "Exception while fetching run {}. Skipping run.".format(run_id), err=True - ) - logging.exception(e) - return None - - -project_name_missing_message = ( - "Project name not provided. Could not synchronize offline runs. " - "To synchronize offline run, specify the project name with the --project flag " - "or by setting the {} environment variable.".format(PROJECT_ENV_NAME) -) - - -def project_not_found_message(project_name: str) -> str: - return ( - "Project {} not found. Could not synchronize offline runs. ".format( - project_name - ) - + "Please ensure you specified the correct project name with the --project flag " - + "or with the {} environment variable, or contact Neptune for support.".format( - PROJECT_ENV_NAME - ) - ) - - -def get_project(project_name_flag: Optional[str]) -> Optional[Project]: - project_name = project_name_flag or os.getenv(PROJECT_ENV_NAME) - if not project_name: - click.echo(textwrap.fill(project_name_missing_message), file=sys.stderr) - return None - try: - return backend.get_project(project_name) - except ProjectNotFound: - click.echo( - textwrap.fill(project_not_found_message(project_name)), file=sys.stderr - ) - return None - - -def get_qualified_name(run: ApiRun) -> str: - return "{}/{}/{}".format(run.workspace, run.project_name, run.short_id) - - -def is_valid_uuid(val: Any) -> bool: - try: - uuid.UUID(val) - return True - except ValueError: - return False - - -####################################################################################################################### -# Listing runs to be synchronized -####################################################################################################################### - - -def is_run_synced(run_path: Path) -> bool: - return all( - is_execution_synced(execution_path) for execution_path in run_path.iterdir() - ) - - -def is_execution_synced(execution_path: Path) -> bool: - disk_queue = DiskQueue( - execution_path, lambda x: x.to_dict(), Operation.from_dict, threading.RLock() - ) - return disk_queue.is_empty() - - -def get_offline_runs_ids(base_path: Path) -> List[str]: - result = [] - if not (base_path / OFFLINE_DIRECTORY).is_dir(): - return [] - for run_path in (base_path / OFFLINE_DIRECTORY).iterdir(): - result.append(run_path.name) - return result - - -def partition_runs(base_path: Path) -> Tuple[List[ApiRun], List[ApiRun], int]: - synced_runs_ids = [] - unsynced_runs_ids = [] - async_path = base_path / ASYNC_DIRECTORY - if not async_path.is_dir(): - return [], [], 0 - - for run_path in async_path.iterdir(): - run_id = run_path.name - if is_run_synced(run_path): - synced_runs_ids.append(run_id) - else: - unsynced_runs_ids.append(run_id) - synced_runs = [run for run in map(get_run, synced_runs_ids)] - unsynced_runs = [run for run in map(get_run, unsynced_runs_ids)] - not_found = len( - [exp for exp in synced_runs + unsynced_runs if not exp or exp.trashed] - ) - synced_runs = [exp for exp in synced_runs if exp and not exp.trashed] - unsynced_runs = [exp for exp in unsynced_runs if exp and not exp.trashed] - - return synced_runs, unsynced_runs, not_found - - -offline_run_explainer = """ -Runs which execute offline are not created on the server and they are not assigned to projects; -instead, they are identified by UUIDs like the ones above. -When synchronizing offline runs, please specify the workspace and project using the "--project" -flag. Alternatively, you can set the environment variable -{} to the target workspace/project. See the examples below. -""".format( - PROJECT_ENV_NAME -) - - -def list_runs( - base_path: Path, - synced_runs: Sequence[ApiRun], - unsynced_runs: Sequence[ApiRun], - offline_runs_ids: Sequence[str], -) -> None: - if not synced_runs and not unsynced_runs and not offline_runs_ids: - click.echo("There are no Neptune runs in {}".format(base_path)) - sys.exit(1) - - if unsynced_runs: - click.echo("Unsynchronized runs:") - for run in unsynced_runs: - click.echo("- {}".format(get_qualified_name(run))) - - if synced_runs: - click.echo("Synchronized runs:") - for run in synced_runs: - click.echo("- {}".format(get_qualified_name(run))) - - if offline_runs_ids: - click.echo("Unsynchronized offline runs:") - for run_id in offline_runs_ids: - click.echo("- {}{}".format(OFFLINE_NAME_PREFIX, run_id)) - click.echo() - click.echo(textwrap.fill(offline_run_explainer, width=90)) - - if not unsynced_runs: - click.echo() - click.echo("There are no unsynchronized runs in {}".format(base_path)) - - if not synced_runs: - click.echo() - click.echo("There are no synchronized runs in {}".format(base_path)) - - click.echo() - click.echo("Please run with the `neptune sync --help` to see example commands.") - - -def synchronization_status(base_path: Path) -> None: - synced_runs, unsynced_runs, not_found = partition_runs(base_path) - if not_found > 0: - click.echo( - "WARNING: {} runs was skipped because they are in trash or do not exist anymore.".format( - not_found - ), - sys.stderr, - ) - offline_runs_ids = get_offline_runs_ids(base_path) - list_runs(base_path, synced_runs, unsynced_runs, offline_runs_ids) - - -####################################################################################################################### -# Run synchronization -####################################################################################################################### - - -def sync_run(run_path: Path, qualified_run_name: str) -> None: - run_id = run_path.name - click.echo("Synchronising {}".format(qualified_run_name)) - for execution_path in run_path.iterdir(): - container_type = ContainerTypeFile(execution_path).container_type - sync_execution(execution_path, run_id, container_type) - click.echo( - f"Synchronization of {container_type.value} {qualified_run_name} completed." - ) - - -def sync_execution( - execution_path: Path, container_id: str, container_type: ContainerType -) -> None: - disk_queue = DiskQueue( - execution_path, - lambda x: x.to_dict(), - Operation.from_dict, - threading.RLock(), - container_type, - ) - while True: - batch, version = disk_queue.get_batch(1000) - if not batch: - break - - start_time = time.monotonic() - expected_count = len(batch) - version_to_ack = version - expected_count - while True: - try: - processed_count, _ = backend.execute_operations( - container_id, - container_type, - operations=batch, - ) - version_to_ack += processed_count - batch = batch[processed_count:] - disk_queue.ack(version) - if version_to_ack == version: - break - except NeptuneConnectionLostException as ex: - if time.monotonic() - start_time > retries_timeout: - raise ex - click.echo( - "Experiencing connection interruptions. " - "Will try to reestablish communication with Neptune. " - f"Internal exception was: {ex.cause.__class__.__name__}", - sys.stderr, - ) - - -def sync_all_registered_runs(base_path: Path) -> None: - async_path = base_path / ASYNC_DIRECTORY - if not async_path.is_dir(): - return - - for run_path in async_path.iterdir(): - if not is_run_synced(run_path): - run_id = run_path.name - run = get_run(run_id) - if run: - sync_run(run_path, get_qualified_name(run)) - - -def sync_selected_registered_runs( - base_path: Path, qualified_runs_names: Sequence[str] -) -> None: - for name in qualified_runs_names: - run = get_run(name) - if run: - run_path = base_path / ASYNC_DIRECTORY / str(run.id) - if run_path.exists(): - sync_run(run_path, name) - else: - click.echo( - "Warning: Run '{}' does not exist in location {}".format( - name, base_path - ), - file=sys.stderr, - ) - - -def register_offline_run( - project: Project, container_type: ContainerType -) -> Optional[Tuple[ApiRun, bool]]: - try: - if container_type == ContainerType.RUN: - return backend.create_run(project.id), True - else: - # No need for registering project. - # Project must've been registered before. - return backend.get_run(project.id), False - except Exception as e: - click.echo( - "Exception occurred while trying to create a run " - "on the Neptune server. Please try again later", - file=sys.stderr, - ) - logging.exception(e) - return None - - -def move_offline_run(base_path: Path, offline_id: str, server_id: str) -> None: - # create async directory for run - (base_path / ASYNC_DIRECTORY / server_id).mkdir(parents=True) - # mv offline directory inside async one - (base_path / OFFLINE_DIRECTORY / offline_id).rename( - base_path / ASYNC_DIRECTORY / server_id / "exec-0-offline" - ) - - -def register_offline_runs( - base_path: Path, project: Project, offline_runs_ids: Iterable[str] -) -> List[ApiRun]: - result = [] - for run_id in offline_runs_ids: - dir_path = base_path / OFFLINE_DIRECTORY / run_id - if dir_path.is_dir(): - container_type = ContainerTypeFile(dir_path).container_type - run, registered = register_offline_run( - project, container_type=container_type - ) - if run: - move_offline_run(base_path, offline_id=run_id, server_id=run.id) - verb = "registered as" if registered else "recognized as" - click.echo( - f"Offline {container_type.value} {run_id} {verb} {get_qualified_name(run)}" - ) - result.append(run) - else: - click.echo( - f"Offline {container_type.value} with UUID {run_id} not found on disk.", - err=True, - ) - return result - - -def is_offline_run_name(name: str) -> bool: - return name.startswith(OFFLINE_NAME_PREFIX) and is_valid_uuid( - name[len(OFFLINE_NAME_PREFIX) :] - ) - - -def sync_offline_runs( - base_path: Path, project_name: Optional[str], offline_run_ids: Sequence[str] -): - if offline_run_ids: - project = get_project(project_name) - if not project: - raise CannotSynchronizeOfflineRunsWithoutProject - registered_runs = register_offline_runs(base_path, project, offline_run_ids) - offline_runs_names = [get_qualified_name(exp) for exp in registered_runs] - sync_selected_registered_runs(base_path, offline_runs_names) - - -def sync_selected_runs( - base_path: Path, project_name: Optional[str], runs_names: Sequence[str] -) -> None: - other_runs_names = [name for name in runs_names if not is_offline_run_name(name)] - sync_selected_registered_runs(base_path, other_runs_names) - - offline_runs_ids = [ - name[len(OFFLINE_NAME_PREFIX) :] - for name in runs_names - if is_offline_run_name(name) - ] - sync_offline_runs(base_path, project_name, offline_runs_ids) - - -def sync_all_runs(base_path: Path, project_name: Optional[str]) -> None: - sync_all_registered_runs(base_path) - - offline_runs_ids = get_offline_runs_ids(base_path) - sync_offline_runs(base_path, project_name, offline_runs_ids) - - -####################################################################################################################### -# Entrypoint for the CLI utility -####################################################################################################################### - - -# pylint: disable=unused-argument -def get_neptune_path(ctx, param, path: str) -> Path: - # check if path exists and contains a '.neptune' folder - path = Path(path) - if (path / NEPTUNE_DATA_DIRECTORY).is_dir(): - return path / NEPTUNE_DATA_DIRECTORY - elif path.name == NEPTUNE_DATA_DIRECTORY and path.is_dir(): - return path - else: - raise click.BadParameter( - "Path {} does not contain a '{}' folder.".format( - path, NEPTUNE_DATA_DIRECTORY - ) - ) - - -path_option = click.option( - "--path", - type=click.Path(exists=True, file_okay=False, resolve_path=True), - default=Path.cwd(), - callback=get_neptune_path, - metavar="", - help="path to a directory containing a '.neptune' folder with stored runs", -) - - -@click.command() -@path_option -def status(path: Path) -> None: - """List synchronized and unsynchronized runs in the given directory. Trashed runs are not listed. - - Neptune stores run data on disk in '.neptune' directories. In case a run executes offline - or network is unavailable as the run executes, run data can be synchronized - with the server with this command line utility. - - Examples: - - \b - # List synchronized and unsynchronized runs in the current directory - neptune status - - \b - # List synchronized and unsynchronized runs in directory "foo/bar" without actually syncing - neptune status --path foo/bar - """ - - # pylint: disable=global-statement - global backend - backend = HostedNeptuneBackend(Credentials.from_token()) - - synchronization_status(path) - - -@click.command() -@path_option -@click.option( - "--run", - "runs_names", - multiple=True, - metavar="", - help="run name (workspace/project/short-id or UUID for offline runs) to synchronize.", -) -@click.option( - "-p", - "--project", - "project_name", - multiple=False, - metavar="project-name", - help="project name (workspace/project) where offline runs will be sent", -) -def sync(path: Path, runs_names: List[str], project_name: Optional[str]): - """Synchronizes runs with unsent data with the server. - - Neptune stores run data on disk in '.neptune' directories. In case a run executes offline - or network is unavailable as the run executes, run data can be synchronized - with the server with this command line utility. - - You can list unsynchronized runs with `neptune status` - - Examples: - - \b - # Synchronize all runs in the current directory - neptune sync - - \b - # Synchronize all runs in the given path - neptune sync --path foo/bar - - \b - # Synchronize only runs "NPT-42" and "NPT-43" in "workspace/project" in the current directory - neptune sync --run workspace/project/NPT-42 --run workspace/project/NPT-43 - - \b - # Synchronise all runs in the current directory, sending offline runs to project "workspace/project" - neptune sync --project workspace/project - - \b - # Synchronize only the offline run with UUID offline/a1561719-b425-4000-a65a-b5efb044d6bb - # to project "workspace/project" - neptune sync --project workspace/project --run offline/a1561719-b425-4000-a65a-b5efb044d6bb - """ - - # pylint: disable=global-statement - global backend - backend = HostedNeptuneBackend(Credentials.from_token()) - - if runs_names: - sync_selected_runs(path, project_name, runs_names) - else: - sync_all_runs(path, project_name) diff --git a/neptune/new/sync/__init__.py b/neptune/new/sync/__init__.py new file mode 100644 index 000000000..d3c65adc9 --- /dev/null +++ b/neptune/new/sync/__init__.py @@ -0,0 +1,176 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +__all__ = [] + +from pathlib import Path +from typing import List, Optional + +import click + +from neptune.new.constants import ( + NEPTUNE_DATA_DIRECTORY, +) +from neptune.new.internal.backends.hosted_neptune_backend import HostedNeptuneBackend +from neptune.new.internal.credentials import Credentials + +from neptune.new.sync.status import StatusRunner +from neptune.new.sync.sync import SyncRunner + + +# backwards compatibility +# pylint: disable=unused-import,wrong-import-order + +from neptune.new.internal.backends.neptune_backend import NeptuneBackend +from neptune.new.internal.disk_queue import DiskQueue +from neptune.new.internal.operation import Operation +from neptune.new.internal.backends.api_model import ApiExperiment, Project +from neptune.new.exceptions import ( + CannotSynchronizeOfflineRunsWithoutProject, + NeptuneConnectionLostException, + NeptuneException, + ProjectNotFound, + RunNotFound, +) + + +# pylint: disable=unused-argument +def get_neptune_path(ctx, param, path: str) -> Path: + # check if path exists and contains a '.neptune' folder + path = Path(path) + if (path / NEPTUNE_DATA_DIRECTORY).is_dir(): + return path / NEPTUNE_DATA_DIRECTORY + elif path.name == NEPTUNE_DATA_DIRECTORY and path.is_dir(): + return path + else: + raise click.BadParameter( + "Path {} does not contain a '{}' folder.".format( + path, NEPTUNE_DATA_DIRECTORY + ) + ) + + +path_option = click.option( + "--path", + type=click.Path(exists=True, file_okay=False, resolve_path=True), + default=Path.cwd(), + callback=get_neptune_path, + metavar="", + help="path to a directory containing a '.neptune' folder with stored objects", +) + + +@click.command() +@path_option +def status(path: Path) -> None: + """List synchronized and unsynchronized objects in the given directory. Trashed objects are not listed. + + Neptune stores object data on disk in '.neptune' directories. In case an object executes offline + or network is unavailable as the object executes, object data can be synchronized + with the server with this command line utility. + + Examples: + + \b + # List synchronized and unsynchronized objects in the current directory + neptune status + + \b + # List synchronized and unsynchronized objects in directory "foo/bar" without actually syncing + neptune status --path foo/bar + """ + + status_runner = StatusRunner(backend=HostedNeptuneBackend(Credentials.from_token())) + + status_runner.synchronization_status(path) + + +@click.command() +@path_option +@click.option( + "--run", + "runs_names", + multiple=True, + metavar="", + help="[deprecated] run name (workspace/project/short-id or UUID for offline runs) to synchronize.", +) +@click.option( + "--object", + "object_names", + multiple=True, + metavar="", + help="object name (workspace/project/short-id or UUID for offline runs) to synchronize.", +) +@click.option( + "-p", + "--project", + "project_name", + multiple=False, + metavar="project-name", + help="project name (workspace/project) where offline runs will be sent", +) +def sync( + path: Path, + runs_names: List[str], + object_names: List[str], + project_name: Optional[str], +): + """Synchronizes objects with unsent data with the server. + + Neptune stores object data on disk in '.neptune' directories. In case a object executes offline + or network is unavailable as the run executes, object data can be synchronized + with the server with this command line utility. + + You can list unsynchronized runs with `neptune status` + + Examples: + + \b + # Synchronize all objects in the current directory + neptune sync + + \b + # Synchronize all objects in the given path + neptune sync --path foo/bar + + \b + # Synchronize only runs "NPT-42" and "NPT-43" in "workspace/project" in the current directory + neptune sync --object workspace/project/NPT-42 --object workspace/project/NPT-43 + + \b + # Synchronise all objects in the current directory, sending offline runs to project "workspace/project" + neptune sync --project workspace/project + + \b + # Synchronize only the offline run with UUID offline/a1561719-b425-4000-a65a-b5efb044d6bb + # to project "workspace/project" + neptune sync --project workspace/project --object offline/a1561719-b425-4000-a65a-b5efb044d6bb + """ + + sync_runner = SyncRunner(backend=HostedNeptuneBackend(Credentials.from_token())) + + if runs_names: + click.echo( + "WARNING: --run parameter is deprecated and will be removed in the future, please start using --object" + ) + # prefer object_names, obviously + object_names = set(object_names) + object_names.update(runs_names) + + if object_names: + sync_runner.sync_selected_containers(path, project_name, object_names) + else: + sync_runner.sync_all_containers(path, project_name) diff --git a/neptune/new/sync/abstract_backend_runner.py b/neptune/new/sync/abstract_backend_runner.py new file mode 100644 index 000000000..331877358 --- /dev/null +++ b/neptune/new/sync/abstract_backend_runner.py @@ -0,0 +1,28 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +__all__ = ["AbstractBackendRunner"] + +import abc + +from neptune.new.internal.backends.neptune_backend import NeptuneBackend + + +class AbstractBackendRunner(abc.ABC): + _backend: NeptuneBackend + + def __init__(self, backend: NeptuneBackend): + self._backend = backend diff --git a/neptune/new/sync/status.py b/neptune/new/sync/status.py new file mode 100644 index 000000000..ae070b865 --- /dev/null +++ b/neptune/new/sync/status.py @@ -0,0 +1,139 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +__all__ = ["StatusRunner"] + +import sys +import textwrap +from pathlib import Path +from typing import List, Sequence, Tuple + +import click + +from neptune.new.constants import ( + ASYNC_DIRECTORY, + OFFLINE_NAME_PREFIX, +) +from neptune.new.envs import PROJECT_ENV_NAME +from neptune.new.internal.backends.api_model import ApiExperiment +from neptune.new.sync.abstract_backend_runner import AbstractBackendRunner +from neptune.new.sync.utils import ( + get_qualified_name, + is_container_synced, + iterate_containers, + get_metadata_container, + get_offline_dirs, +) + +offline_run_explainer = """ +Runs which execute offline are not created on the server and they are not assigned to projects; +instead, they are identified by UUIDs like the ones above. +When synchronizing offline runs, please specify the workspace and project using the "--project" +flag. Alternatively, you can set the environment variable +{} to the target workspace/project. See the examples below. +""".format( + PROJECT_ENV_NAME +) + + +class StatusRunner(AbstractBackendRunner): + def partition_containers( + self, + base_path: Path, + ) -> Tuple[List[ApiExperiment], List[ApiExperiment], int]: + synced_containers = [] + unsynced_containers = [] + async_path = base_path / ASYNC_DIRECTORY + for container_type, container_id, path in iterate_containers(async_path): + metadata_container = get_metadata_container( + backend=self._backend, + container_id=container_id, + container_type=container_type, + ) + + if is_container_synced(path): + synced_containers.append(metadata_container) + else: + unsynced_containers.append(metadata_container) + + not_found = len( + [ + exp + for exp in synced_containers + unsynced_containers + if not exp or exp.trashed + ] + ) + synced_containers = [ + obj for obj in synced_containers if obj and not obj.trashed + ] + unsynced_containers = [ + obj for obj in unsynced_containers if obj and not obj.trashed + ] + + return synced_containers, unsynced_containers, not_found + + @staticmethod + def list_containers( + base_path: Path, + synced_containers: Sequence[ApiExperiment], + unsynced_containers: Sequence[ApiExperiment], + offline_dirs: Sequence[str], + ) -> None: + if not synced_containers and not unsynced_containers and not offline_dirs: + click.echo("There are no Neptune objects in {}".format(base_path)) + sys.exit(1) + + if unsynced_containers: + click.echo("Unsynchronized objects:") + for container in unsynced_containers: + click.echo("- {}".format(get_qualified_name(container))) + + if synced_containers: + click.echo("Synchronized objects:") + for container in synced_containers: + click.echo("- {}".format(get_qualified_name(container))) + + if offline_dirs: + click.echo("Unsynchronized offline objects:") + for run_id in offline_dirs: + click.echo("- {}{}".format(OFFLINE_NAME_PREFIX, run_id)) + click.echo() + click.echo(textwrap.fill(offline_run_explainer, width=90)) + + if not unsynced_containers: + click.echo() + click.echo("There are no unsynchronized objects in {}".format(base_path)) + + if not synced_containers: + click.echo() + click.echo("There are no synchronized objects in {}".format(base_path)) + + click.echo() + click.echo("Please run with the `neptune sync --help` to see example commands.") + + def synchronization_status(self, base_path: Path) -> None: + synced_containers, unsynced_containers, not_found = self.partition_containers( + base_path + ) + if not_found > 0: + click.echo( + f"WARNING: {not_found} objects was skipped because they are in trash or do not exist anymore.", + sys.stderr, + ) + offline_dirs = get_offline_dirs(base_path) + self.list_containers( + base_path, synced_containers, unsynced_containers, offline_dirs + ) diff --git a/neptune/new/sync/sync.py b/neptune/new/sync/sync.py new file mode 100644 index 000000000..bbe7d7f6f --- /dev/null +++ b/neptune/new/sync/sync.py @@ -0,0 +1,251 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +__all__ = ["SyncRunner"] + +import logging +import os +import sys +import threading +import time +from pathlib import Path +from typing import Iterable, List, Optional, Sequence + +import click + +from neptune.new.constants import ( + ASYNC_DIRECTORY, + OFFLINE_DIRECTORY, + OFFLINE_NAME_PREFIX, +) +from neptune.new.envs import NEPTUNE_SYNC_BATCH_TIMEOUT_ENV +from neptune.new.exceptions import ( + CannotSynchronizeOfflineRunsWithoutProject, + NeptuneConnectionLostException, +) +from neptune.new.internal.backends.api_model import ApiExperiment, Project +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.disk_queue import DiskQueue +from neptune.new.internal.id_formats import QualifiedName, UniqueId +from neptune.new.internal.operation import Operation +from neptune.new.sync.abstract_backend_runner import AbstractBackendRunner +from neptune.new.sync.utils import ( + get_project, + get_qualified_name, + is_container_synced, + iterate_containers, + get_metadata_container, + get_offline_dirs, + split_dir_name, + create_dir_name, +) + +retries_timeout = int(os.getenv(NEPTUNE_SYNC_BATCH_TIMEOUT_ENV, "3600")) + + +class SyncRunner(AbstractBackendRunner): + def sync_run(self, run_path: Path, run: ApiExperiment) -> None: + qualified_run_name = get_qualified_name(run) + click.echo("Synchronising {}".format(qualified_run_name)) + for execution_path in run_path.iterdir(): + self.sync_execution( + execution_path=execution_path, + container_id=run.id, + container_type=run.type, + ) + click.echo( + f"Synchronization of {run.type.value} {qualified_run_name} completed." + ) + + def sync_execution( + self, + execution_path: Path, + container_id: UniqueId, + container_type: ContainerType, + ) -> None: + disk_queue = DiskQueue( + dir_path=execution_path, + to_dict=lambda x: x.to_dict(), + from_dict=Operation.from_dict, + lock=threading.RLock(), + ) + while True: + batch, version = disk_queue.get_batch(1000) + if not batch: + break + + start_time = time.monotonic() + expected_count = len(batch) + version_to_ack = version - expected_count + while True: + try: + processed_count, _ = self._backend.execute_operations( + container_id=container_id, + container_type=container_type, + operations=batch, + ) + version_to_ack += processed_count + batch = batch[processed_count:] + disk_queue.ack(version) + if version_to_ack == version: + break + except NeptuneConnectionLostException as ex: + if time.monotonic() - start_time > retries_timeout: + raise ex + click.echo( + "Experiencing connection interruptions. " + "Will try to reestablish communication with Neptune. " + f"Internal exception was: {ex.cause.__class__.__name__}", + sys.stderr, + ) + + def sync_all_registered_containers(self, base_path: Path) -> None: + async_path = base_path / ASYNC_DIRECTORY + for container_type, unique_id, path in iterate_containers(async_path): + if not is_container_synced(path): + run = get_metadata_container( + backend=self._backend, + container_id=unique_id, + container_type=container_type, + ) + if run: + self.sync_run(run_path=path, run=run) + + def sync_selected_registered_containers( + self, base_path: Path, qualified_container_names: Sequence[QualifiedName] + ) -> None: + for name in qualified_container_names: + run = get_metadata_container( + backend=self._backend, + container_id=name, + ) + if run: + run_path = ( + base_path / ASYNC_DIRECTORY / f"{create_dir_name(run.type, run.id)}" + ) + run_path_deprecated = base_path / ASYNC_DIRECTORY / f"{run.id}" + if run_path.exists(): + self.sync_run(run_path=run_path, run=run) + elif run_path_deprecated.exists(): + self.sync_run(run_path=run_path_deprecated, run=run) + else: + click.echo( + "Warning: Run '{}' does not exist in location {}".format( + name, base_path + ), + file=sys.stderr, + ) + + def _register_offline_run( + self, project: Project, container_type: ContainerType + ) -> Optional[ApiExperiment]: + try: + if container_type == ContainerType.RUN: + return self._backend.create_run(project.id) + else: + raise ValueError("Only runs are supported in offline mode") + except Exception as e: + click.echo( + "Exception occurred while trying to create a run " + "on the Neptune server. Please try again later", + file=sys.stderr, + ) + logging.exception(e) + return None + + @staticmethod + def _move_offline_run( + base_path: Path, + offline_dir: str, + server_id: UniqueId, + server_type: ContainerType, + ) -> None: + online_dir = create_dir_name(container_type=server_type, container_id=server_id) + # create async directory for run + (base_path / ASYNC_DIRECTORY / online_dir).mkdir(parents=True) + # mv offline directory inside async one + (base_path / OFFLINE_DIRECTORY / offline_dir).rename( + base_path / ASYNC_DIRECTORY / online_dir / "exec-0-offline" + ) + + def register_offline_runs( + self, base_path: Path, project: Project, offline_dirs: Iterable[str] + ) -> List[ApiExperiment]: + result = [] + for offline_dir in offline_dirs: + offline_path = base_path / OFFLINE_DIRECTORY / offline_dir + if offline_path.is_dir(): + container_type, _ = split_dir_name(dir_name=offline_dir) + run = self._register_offline_run(project, container_type=container_type) + if run: + self._move_offline_run( + base_path=base_path, + offline_dir=offline_dir, + server_id=run.id, + server_type=run.type, + ) + click.echo( + f"Offline run {offline_dir} registered as {get_qualified_name(run)}" + ) + result.append(run) + else: + click.echo( + f"Offline run {offline_dir} not found on disk.", + err=True, + ) + return result + + def sync_offline_runs( + self, + base_path: Path, + project_name: Optional[QualifiedName], + offline_dirs: Sequence[UniqueId], + ): + if offline_dirs: + project = get_project(project_name, backend=self._backend) + if not project: + raise CannotSynchronizeOfflineRunsWithoutProject + registered_runs = self.register_offline_runs( + base_path, project, offline_dirs + ) + offline_runs_names = [get_qualified_name(exp) for exp in registered_runs] + self.sync_selected_registered_containers(base_path, offline_runs_names) + + def sync_selected_containers( + self, + base_path: Path, + project_name: Optional[str], + container_names: Sequence[str], + ) -> None: + non_offline_container_names = [ + QualifiedName(name) + for name in container_names + if not name.startswith(OFFLINE_NAME_PREFIX) + ] + self.sync_selected_registered_containers(base_path, non_offline_container_names) + + offline_dirs = [ + UniqueId(name[len(OFFLINE_NAME_PREFIX) :]) + for name in container_names + if name.startswith(OFFLINE_NAME_PREFIX) + ] + self.sync_offline_runs(base_path, project_name, offline_dirs) + + def sync_all_containers(self, base_path: Path, project_name: Optional[str]) -> None: + self.sync_all_registered_containers(base_path) + + offline_dirs = get_offline_dirs(base_path) + self.sync_offline_runs(base_path, project_name, offline_dirs) diff --git a/neptune/new/sync/utils.py b/neptune/new/sync/utils.py new file mode 100644 index 000000000..896bcf4d0 --- /dev/null +++ b/neptune/new/sync/utils.py @@ -0,0 +1,163 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +__all__ = [ + "get_metadata_container", + "get_project", + "get_qualified_name", + "is_container_synced", + "get_offline_dirs", + "iterate_containers", + "create_dir_name", + "split_dir_name", +] + +import logging +import os +import sys +import textwrap +import threading +from pathlib import Path +from typing import Optional, Iterator, Tuple, List, Union + +import click + +from neptune.new.constants import OFFLINE_DIRECTORY +from neptune.new.envs import PROJECT_ENV_NAME +from neptune.new.exceptions import ( + NeptuneException, + ProjectNotFound, + MetadataContainerNotFound, +) +from neptune.new.internal.backends.api_model import ApiExperiment, Project +from neptune.new.internal.backends.neptune_backend import NeptuneBackend +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.disk_queue import DiskQueue +from neptune.new.internal.id_formats import QualifiedName, UniqueId +from neptune.new.internal.operation import Operation + + +def get_metadata_container( + backend: NeptuneBackend, + container_id: Union[UniqueId, QualifiedName], + container_type: Optional[ContainerType] = None, +) -> Optional[ApiExperiment]: + public_container_type = container_type or "object" + try: + return backend.get_metadata_container( + container_id, expected_container_type=container_type + ) + except MetadataContainerNotFound: + click.echo(f"Can't fetch {public_container_type} {container_id}. Skipping.") + except NeptuneException as e: + click.echo( + f"Exception while fetching {public_container_type} {container_id}. Skipping.", + err=True, + ) + logging.exception(e) + + return None + + +_project_name_missing_message = ( + "Project name not provided. Could not synchronize offline runs. " + "To synchronize offline run, specify the project name with the --project flag " + "or by setting the {} environment variable.".format(PROJECT_ENV_NAME) +) + + +def _project_not_found_message(project_name: QualifiedName) -> str: + return ( + "Project {} not found. Could not synchronize offline runs. ".format( + project_name + ) + + "Please ensure you specified the correct project name with the --project flag " + + "or with the {} environment variable, or contact Neptune for support.".format( + PROJECT_ENV_NAME + ) + ) + + +def get_project( + project_name_flag: Optional[QualifiedName], backend: NeptuneBackend +) -> Optional[Project]: + project_name = project_name_flag or QualifiedName(os.getenv(PROJECT_ENV_NAME)) + if not project_name: + click.echo(textwrap.fill(_project_name_missing_message), file=sys.stderr) + return None + try: + return backend.get_project(project_name) + except ProjectNotFound: + click.echo( + textwrap.fill(_project_not_found_message(project_name)), file=sys.stderr + ) + return None + + +def get_qualified_name(run: ApiExperiment) -> QualifiedName: + return QualifiedName("{}/{}/{}".format(run.workspace, run.project_name, run.sys_id)) + + +def is_container_synced(run_path: Path) -> bool: + return all( + _is_execution_synced(execution_path) for execution_path in run_path.iterdir() + ) + + +def _is_execution_synced(execution_path: Path) -> bool: + disk_queue = DiskQueue( + execution_path, + lambda x: x.to_dict(), + Operation.from_dict, + threading.RLock(), + ) + return disk_queue.is_empty() + + +def create_dir_name(container_type: ContainerType, container_id: UniqueId) -> str: + return f"{container_type.value}__{container_id}" + + +def split_dir_name(dir_name: str) -> Tuple[ContainerType, UniqueId]: + parts = dir_name.split("__") + if len(parts) == 2: + return ContainerType(parts[0]), UniqueId(parts[1]) + elif len(parts) == 1: + return ContainerType.RUN, UniqueId(dir_name) + else: + raise ValueError(f"Wrong dir format: {dir_name}") + + +def iterate_containers( + base_path: Path, +) -> Iterator[Tuple[ContainerType, UniqueId, Path]]: + if not base_path.is_dir(): + return + + for path in base_path.iterdir(): + container_type, unique_id = split_dir_name(dir_name=path.name) + + yield container_type, unique_id, path + + +def get_offline_dirs(base_path: Path) -> List[UniqueId]: + result = [] + if not (base_path / OFFLINE_DIRECTORY).is_dir(): + return [] + for path_ in (base_path / OFFLINE_DIRECTORY).iterdir(): + dir_name = path_.name + result.append(UniqueId(dir_name)) + return result diff --git a/e2e_tests/integrations/common.py b/neptune/new/types/model_version_stage.py similarity index 75% rename from e2e_tests/integrations/common.py rename to neptune/new/types/model_version_stage.py index c299a6e97..055647de8 100644 --- a/e2e_tests/integrations/common.py +++ b/neptune/new/types/model_version_stage.py @@ -13,10 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from typing import Sequence +import enum -def does_series_converge(seq: Sequence): - # very naive implementation, but ok for us - third = int(len(seq) / 3) - return sum(seq[:third]) > sum(seq[-third:]) + +class ModelVersionStage(enum.Enum): + NONE = "none" + STAGING = "staging" + PRODUCTION = "production" + ARCHIVED = "archived" diff --git a/neptune/new/types/value_copy.py b/neptune/new/types/value_copy.py index 984b60e03..e8fe258a8 100644 --- a/neptune/new/types/value_copy.py +++ b/neptune/new/types/value_copy.py @@ -20,7 +20,7 @@ from neptune.new.types.value import Value if TYPE_CHECKING: - from neptune.new.handler import Handler + from neptune.new.metadata_containers import Handler from neptune.new.types.value_visitor import ValueVisitor Ret = TypeVar("Ret") @@ -37,7 +37,7 @@ def __init__(self, source_handler: "Handler"): def accept(self, visitor: "ValueVisitor[Ret]") -> Ret: # pylint: disable=protected-access source_path = self.source_handler._path - source_attr = self.source_handler._run.get_attribute(source_path) + source_attr = self.source_handler._container.get_attribute(source_path) if source_attr and source_attr.supports_copy: return visitor.copy_value( source_type=type(source_attr), source_path=parse_path(source_path) diff --git a/pylintrc b/pylintrc index d024a2685..f113715f4 100644 --- a/pylintrc +++ b/pylintrc @@ -22,7 +22,7 @@ persistent=yes load-plugins=pylintfileheader # File header -file-header=#\n# Copyright \(c\) (2019|2020|2021), Neptune Labs Sp\. z o\.o\.\n#\n# Licensed under the Apache License, Version 2\.0 \(the "License"\);\n# you may not use this file except in compliance with the License\.\n# You may obtain a copy of the License at\n#\n# http://www\.apache\.org/licenses/LICENSE-2\.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an "AS IS" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied\.\n# See the License for the specific language governing permissions and\n# limitations under the License\.\n#\n +file-header=#\n# Copyright \(c\) (2019|2020|2021|2022), Neptune Labs Sp\. z o\.o\.\n#\n# Licensed under the Apache License, Version 2\.0 \(the "License"\);\n# you may not use this file except in compliance with the License\.\n# You may obtain a copy of the License at\n#\n# http://www\.apache\.org/licenses/LICENSE-2\.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an "AS IS" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied\.\n# See the License for the specific language governing permissions and\n# limitations under the License\.\n#\n [MESSAGES CONTROL] diff --git a/tests/neptune/experiments_object_factory.py b/tests/neptune/experiments_object_factory.py index 75923ae28..5e2538914 100644 --- a/tests/neptune/experiments_object_factory.py +++ b/tests/neptune/experiments_object_factory.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2019, Neptune Labs Sp. z o.o. +# Copyright (c) 2021, Neptune Labs Sp. z o.o. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/neptune/new/attributes/series/test_file_series.py b/tests/neptune/new/attributes/series/test_file_series.py index 4e1170014..1d84fd848 100644 --- a/tests/neptune/new/attributes/series/test_file_series.py +++ b/tests/neptune/new/attributes/series/test_file_series.py @@ -23,7 +23,7 @@ from neptune.new.types import File from neptune.new.attributes.series.file_series import FileSeries from tests.neptune.new.attributes.test_attribute_base import TestAttributeBase -from tests.neptune.new.helpers import create_file +from tests.neptune.new.utils.file_helpers import create_file @patch("time.time", new=TestAttributeBase._now) diff --git a/tests/neptune/new/attributes/test_attribute_base.py b/tests/neptune/new/attributes/test_attribute_base.py index f0e0fadbb..f73e89ae2 100644 --- a/tests/neptune/new/attributes/test_attribute_base.py +++ b/tests/neptune/new/attributes/test_attribute_base.py @@ -23,6 +23,7 @@ from mock import MagicMock from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.id_formats import UniqueId from neptune.new.internal.operation_processors.operation_processor import ( OperationProcessor, ) @@ -33,29 +34,30 @@ from neptune.new.internal.backends.neptune_backend_mock import NeptuneBackendMock -from neptune.new.run import Run +from neptune.new.metadata_containers import Run _now = time.time() class TestAttributeBase(unittest.TestCase): + # TODO: test Projects, Model and ModelVersion @staticmethod def _create_run(processor: Optional[OperationProcessor] = None): backend = NeptuneBackendMock() - exp = backend.create_run(str(uuid.uuid4())) + exp = backend.create_run(UniqueId(str(uuid.uuid4()))) if processor is None: processor = SyncOperationProcessor(exp.id, ContainerType.RUN, backend) _run = Run( - exp.id, - backend, - processor, - MagicMock(), - threading.RLock(), - MagicMock(), - MagicMock(), - MagicMock(), - MagicMock(), + id_=exp.id, + backend=backend, + op_processor=processor, + background_job=MagicMock(), + lock=threading.RLock(), + workspace=MagicMock(), + project_id=MagicMock(), + project_name=MagicMock(), + sys_id=MagicMock(), ) _run.sync() _run.start() diff --git a/tests/neptune/new/internal/containers/__init__.py b/tests/neptune/new/client/__init__.py similarity index 92% rename from tests/neptune/new/internal/containers/__init__.py rename to tests/neptune/new/client/__init__.py index 63b30720b..b5e585d90 100644 --- a/tests/neptune/new/internal/containers/__init__.py +++ b/tests/neptune/new/client/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# Copyright (c) 2022, Neptune Labs Sp. z o.o. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/neptune/new/client/abstract_experiment_test_mixin.py b/tests/neptune/new/client/abstract_experiment_test_mixin.py new file mode 100644 index 000000000..60a2907f8 --- /dev/null +++ b/tests/neptune/new/client/abstract_experiment_test_mixin.py @@ -0,0 +1,99 @@ +# +# Copyright (c) 2021, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pylint: disable=protected-access +import os +from abc import abstractmethod + +from neptune.new.exceptions import ( + MetadataInconsistency, + MissingFieldException, + NeptuneOfflineModeFetchException, + TypeDoesNotSupportAttributeException, +) + + +class AbstractExperimentTestMixin: + @staticmethod + @abstractmethod + def call_init(**kwargs): + pass + + def test_incorrect_mode(self): + with self.assertRaises(ValueError): + self.call_init(mode="srtgj") + + def test_debug_mode(self): + exp = self.call_init(mode="debug") + exp["some/variable"] = 13 + self.assertEqual(13, exp["some/variable"].fetch()) + self.assertNotIn(str(exp._id), os.listdir(".neptune")) + + def test_offline_mode(self): + exp = self.call_init(mode="offline") + exp["some/variable"] = 13 + with self.assertRaises(NeptuneOfflineModeFetchException): + exp["some/variable"].fetch() + + exp_dir = f"{exp.container_type.value}__{exp._id}" + self.assertIn(exp_dir, os.listdir(".neptune/offline")) + self.assertIn("data-1.log", os.listdir(f".neptune/offline/{exp_dir}")) + + def test_sync_mode(self): + exp = self.call_init(mode="sync") + exp["some/variable"] = 13 + exp["copied/variable"] = exp["some/variable"] + self.assertEqual(13, exp["some/variable"].fetch()) + self.assertEqual(13, exp["copied/variable"].fetch()) + self.assertNotIn(str(exp._id), os.listdir(".neptune")) + + def test_async_mode(self): + with self.call_init(mode="async", flush_period=0.5) as exp: + exp["some/variable"] = 13 + exp["copied/variable"] = exp["some/variable"] + with self.assertRaises(MetadataInconsistency): + exp["some/variable"].fetch() + exp.wait() + self.assertEqual(13, exp["some/variable"].fetch()) + self.assertEqual(13, exp["copied/variable"].fetch()) + + exp_dir = f"{exp.container_type.value}__{exp._id}" + self.assertIn(exp_dir, os.listdir(".neptune/async")) + execution_dir = os.listdir(f".neptune/async/{exp_dir}")[0] + self.assertIn( + "data-1.log", + os.listdir(f".neptune/async/{exp_dir}/{execution_dir}"), + ) + + def test_missing_attribute(self): + exp = self.call_init(mode="debug") + with self.assertRaises(MissingFieldException): + exp["non/existing/path"].fetch() + + def test_wrong_function(self): + exp = self.call_init(mode="debug") + with self.assertRaises(AttributeError): + exp["non/existing/path"].foo() + + def test_wrong_per_type_function(self): + exp = self.call_init(mode="debug") + exp["some/path"] = "foo" + with self.assertRaises(TypeDoesNotSupportAttributeException): + exp["some/path"].download() + + @abstractmethod + def test_read_only_mode(self): + pass diff --git a/tests/neptune/new/client/abstract_tables_test.py b/tests/neptune/new/client/abstract_tables_test.py new file mode 100644 index 000000000..06c4f110b --- /dev/null +++ b/tests/neptune/new/client/abstract_tables_test.py @@ -0,0 +1,197 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pylint: disable=protected-access +import os +import uuid +from abc import abstractmethod +from datetime import datetime + +from mock import Mock, patch + +from neptune.new import ANONYMOUS +from neptune.new.envs import API_TOKEN_ENV_NAME, PROJECT_ENV_NAME +from neptune.new.exceptions import ( + MetadataInconsistency, +) +from neptune.new.internal.backends.api_model import ( + Attribute, + AttributeType, + AttributeWithProperties, + LeaderboardEntry, +) +from neptune.new.internal.backends.neptune_backend_mock import NeptuneBackendMock + + +@patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", + new=lambda _, _uuid, _type: [Attribute(path="test", type=AttributeType.STRING)], +) +@patch("neptune.new.internal.backends.factory.HostedNeptuneBackend", NeptuneBackendMock) +class AbstractTablesTestMixin: + expected_container_type = None + + @abstractmethod + def get_table(self): + pass + + @abstractmethod + def get_table_entries(self, table): + pass + + @classmethod + def setUpClass(cls) -> None: + os.environ[API_TOKEN_ENV_NAME] = ANONYMOUS + + @classmethod + def setUp(cls) -> None: + if PROJECT_ENV_NAME in os.environ: + del os.environ[PROJECT_ENV_NAME] + + @staticmethod + def build_attributes_leaderboard(now: datetime): + attributes = [] + attributes.append( + AttributeWithProperties( + "run/state", AttributeType.RUN_STATE, Mock(value="idle") + ) + ) + attributes.append( + AttributeWithProperties("float", AttributeType.FLOAT, Mock(value=12.5)) + ) + attributes.append( + AttributeWithProperties( + "string", AttributeType.STRING, Mock(value="some text") + ) + ) + attributes.append( + AttributeWithProperties("datetime", AttributeType.DATETIME, Mock(value=now)) + ) + attributes.append( + AttributeWithProperties( + "float/series", AttributeType.FLOAT_SERIES, Mock(last=8.7) + ) + ) + attributes.append( + AttributeWithProperties( + "string/series", AttributeType.STRING_SERIES, Mock(last="last text") + ) + ) + attributes.append( + AttributeWithProperties( + "string/set", AttributeType.STRING_SET, Mock(values=["a", "b"]) + ) + ) + attributes.append( + AttributeWithProperties( + "git/ref", + AttributeType.GIT_REF, + Mock(commit=Mock(commitId="abcdef0123456789")), + ) + ) + attributes.append(AttributeWithProperties("file", AttributeType.FILE, None)) + attributes.append( + AttributeWithProperties("file/set", AttributeType.FILE_SET, None) + ) + attributes.append( + AttributeWithProperties("image/series", AttributeType.IMAGE_SERIES, None) + ) + return attributes + + @patch.object(NeptuneBackendMock, "search_leaderboard_entries") + def test_get_table_as_pandas(self, search_leaderboard_entries): + # given + now = datetime.now() + attributes = self.build_attributes_leaderboard(now) + + # and + empty_entry = LeaderboardEntry(str(uuid.uuid4()), []) + filled_entry = LeaderboardEntry(str(uuid.uuid4()), attributes) + search_leaderboard_entries.return_value = [empty_entry, filled_entry] + + # when + df = self.get_table().to_pandas() + + # then + self.assertEqual("idle", df["run/state"][1]) + self.assertEqual(12.5, df["float"][1]) + self.assertEqual("some text", df["string"][1]) + self.assertEqual(now, df["datetime"][1]) + self.assertEqual(8.7, df["float/series"][1]) + self.assertEqual("last text", df["string/series"][1]) + self.assertEqual("a,b", df["string/set"][1]) + self.assertEqual("abcdef0123456789", df["git/ref"][1]) + + with self.assertRaises(KeyError): + self.assertTrue(df["file"]) + with self.assertRaises(KeyError): + self.assertTrue(df["file/set"]) + with self.assertRaises(KeyError): + self.assertTrue(df["image/series"]) + + @patch.object(NeptuneBackendMock, "search_leaderboard_entries") + @patch.object(NeptuneBackendMock, "download_file") + @patch.object(NeptuneBackendMock, "download_file_set") + def test_get_table_as_table_entries( + self, + download_file_set, + download_file, + search_leaderboard_entries, + ): + # given + exp_id = str(uuid.uuid4()) + now = datetime.now() + attributes = self.build_attributes_leaderboard(now) + + # and + search_leaderboard_entries.return_value = [LeaderboardEntry(exp_id, attributes)] + + # when + table_entry = self.get_table_entries(table=self.get_table())[0] + + # then + self.assertEqual("idle", table_entry["run/state"].get()) + self.assertEqual("idle", table_entry["run"]["state"].get()) + self.assertEqual(12.5, table_entry["float"].get()) + self.assertEqual("some text", table_entry["string"].get()) + self.assertEqual(now, table_entry["datetime"].get()) + self.assertEqual(8.7, table_entry["float/series"].get()) + self.assertEqual("last text", table_entry["string/series"].get()) + self.assertEqual({"a", "b"}, table_entry["string/set"].get()) + self.assertEqual("abcdef0123456789", table_entry["git/ref"].get()) + + with self.assertRaises(MetadataInconsistency): + table_entry["file"].get() + with self.assertRaises(MetadataInconsistency): + table_entry["file/set"].get() + with self.assertRaises(MetadataInconsistency): + table_entry["image/series"].get() + + table_entry["file"].download("some_directory") + download_file.assert_called_with( + container_id=exp_id, + container_type=self.expected_container_type, + path=["file"], + destination="some_directory", + ) + + table_entry["file/set"].download("some_directory") + download_file_set.assert_called_with( + container_id=exp_id, + container_type=self.expected_container_type, + path=["file", "set"], + destination="some_directory", + ) diff --git a/tests/neptune/new/client/test_model.py b/tests/neptune/new/client/test_model.py new file mode 100644 index 000000000..e8fa63d93 --- /dev/null +++ b/tests/neptune/new/client/test_model.py @@ -0,0 +1,105 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pylint: disable=protected-access +import os +import unittest + +from mock import patch + +from neptune.new import ANONYMOUS, init_model +from neptune.new.attributes import String +from neptune.new.envs import API_TOKEN_ENV_NAME, PROJECT_ENV_NAME +from neptune.new.exceptions import NeptuneWrongInitParametersException, NeptuneException +from neptune.new.internal.backends.api_model import ( + Attribute, + AttributeType, + IntAttribute, +) +from neptune.new.internal.backends.neptune_backend_mock import NeptuneBackendMock +from tests.neptune.new.client.abstract_experiment_test_mixin import ( + AbstractExperimentTestMixin, +) +from tests.neptune.new.utils.api_experiments_factory import api_model + +AN_API_MODEL = api_model() + + +@patch("neptune.new.internal.backends.factory.HostedNeptuneBackend", NeptuneBackendMock) +class TestClientModel(AbstractExperimentTestMixin, unittest.TestCase): + @staticmethod + def call_init(**kwargs): + return init_model(key="MOD", **kwargs) + + @classmethod + def setUpClass(cls) -> None: + os.environ[PROJECT_ENV_NAME] = "organization/project" + os.environ[API_TOKEN_ENV_NAME] = ANONYMOUS + + def test_offline_mode(self): + with self.assertRaises(NeptuneException): + init_model(key="MOD", mode="offline") + + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_metadata_container", + new=lambda _, container_id, expected_container_type: AN_API_MODEL, + ) + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", + new=lambda _, _uuid, _type: [Attribute("some/variable", AttributeType.INT)], + ) + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_int_attribute", + new=lambda _, _uuid, _type, _path: IntAttribute(42), + ) + def test_read_only_mode(self): + exp = init_model(mode="read-only", model="whatever") + + with self.assertLogs() as caplog: + exp["some/variable"] = 13 + exp["some/other_variable"] = 11 + self.assertEqual( + caplog.output, + [ + "WARNING:neptune.new.internal.operation_processors.read_only_operation_processor:" + "Client in read-only mode, nothing will be saved to server." + ], + ) + + self.assertEqual(42, exp["some/variable"].fetch()) + self.assertNotIn(str(exp._id), os.listdir(".neptune")) + + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_metadata_container", + new=lambda _, container_id, expected_container_type: AN_API_MODEL, + ) + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", + new=lambda _, _uuid, _type: [Attribute("test", AttributeType.STRING)], + ) + def test_resume(self): + with init_model(flush_period=0.5, model="whatever") as exp: + self.assertEqual(exp._id, AN_API_MODEL.id) + self.assertIsInstance(exp.get_structure()["test"], String) + + def test_wrong_parameters(self): + with self.assertRaises(NeptuneWrongInitParametersException): + init_model(model=None, key=None) + + def test_name_parameter(self): + with init_model(key="TRY", name="some_name") as exp: + exp.wait() + self.assertEqual(exp["sys/name"].fetch(), "some_name") diff --git a/tests/neptune/new/client/test_model_version.py b/tests/neptune/new/client/test_model_version.py new file mode 100644 index 000000000..ded1067fc --- /dev/null +++ b/tests/neptune/new/client/test_model_version.py @@ -0,0 +1,147 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pylint: disable=protected-access +import os +import unittest + +from mock import patch + +from neptune.new import ANONYMOUS, init_model_version +from neptune.new.attributes import String +from neptune.new.envs import API_TOKEN_ENV_NAME, PROJECT_ENV_NAME +from neptune.new.exceptions import ( + NeptuneOfflineModeChangeStageException, + NeptuneWrongInitParametersException, + NeptuneException, +) +from neptune.new.internal.backends.api_model import ( + Attribute, + AttributeType, + IntAttribute, + StringAttribute, +) +from neptune.new.internal.backends.neptune_backend_mock import NeptuneBackendMock +from neptune.new.internal.container_type import ContainerType +from tests.neptune.new.client.abstract_experiment_test_mixin import ( + AbstractExperimentTestMixin, +) +from tests.neptune.new.utils.api_experiments_factory import api_model, api_model_version + +AN_API_MODEL = api_model() +AN_API_MODEL_VERSION = api_model_version() + + +@patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_metadata_container", + new=lambda _, container_id, expected_container_type: AN_API_MODEL + if expected_container_type == ContainerType.MODEL + else AN_API_MODEL_VERSION, +) +@patch("neptune.new.internal.backends.factory.HostedNeptuneBackend", NeptuneBackendMock) +class TestClientModelVersion(AbstractExperimentTestMixin, unittest.TestCase): + @staticmethod + def call_init(**kwargs): + return init_model_version(model="PRO-MOD", **kwargs) + + @classmethod + def setUpClass(cls) -> None: + os.environ[PROJECT_ENV_NAME] = "organization/project" + os.environ[API_TOKEN_ENV_NAME] = ANONYMOUS + + def test_offline_mode(self): + with self.assertRaises(NeptuneException): + init_model_version(model="PRO-MOD", mode="offline") + + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", + new=lambda _, _uuid, _type: [ + Attribute("some/variable", AttributeType.INT), + Attribute("sys/model_id", AttributeType.STRING), + ], + ) + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_int_attribute", + new=lambda _, _uuid, _type, _path: IntAttribute(42), + ) + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_string_attribute", + new=lambda _, _uuid, _type, _path: StringAttribute("MDL"), + ) + def test_read_only_mode(self): + exp = init_model_version(mode="read-only", version="whatever") + + with self.assertLogs() as caplog: + exp["some/variable"] = 13 + exp["some/other_variable"] = 11 + self.assertEqual( + caplog.output, + [ + "WARNING:neptune.new.internal.operation_processors.read_only_operation_processor:" + "Client in read-only mode, nothing will be saved to server." + ], + ) + + self.assertEqual(42, exp["some/variable"].fetch()) + self.assertNotIn(str(exp._id), os.listdir(".neptune")) + + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", + new=lambda _, _uuid, _type: [ + Attribute("test", AttributeType.STRING), + Attribute("sys/model_id", AttributeType.STRING), + ], + ) + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_string_attribute", + new=lambda _, _uuid, _type, _path: StringAttribute("MDL"), + ) + def test_resume(self): + with init_model_version(flush_period=0.5, version="whatever") as exp: + self.assertEqual(exp._id, AN_API_MODEL_VERSION.id) + self.assertIsInstance(exp.get_structure()["test"], String) + + def test_sync_mode(self): + AbstractExperimentTestMixin.test_sync_mode(self) + + def test_async_mode(self): + AbstractExperimentTestMixin.test_async_mode(self) + + def test_wrong_parameters(self): + with self.assertRaises(NeptuneWrongInitParametersException): + init_model_version(version=None, model=None) + + def test_change_stage(self): + exp = self.call_init() + exp.change_stage(stage="production") + + self.assertEqual("production", exp["sys/stage"].fetch()) + + with self.assertRaises(ValueError): + exp.change_stage(stage="wrong_stage") + + def test_change_stage_of_offline_model_version(self): + # this test will be required when we decide that creating model versions + # in offline mode is allowed + with self.assertRaises(NeptuneException): + exp = self.call_init(mode="offline") + with self.assertRaises(NeptuneOfflineModeChangeStageException): + exp.change_stage(stage="production") + + def test_name_parameter(self): + with self.call_init(name="some_name") as exp: + exp.wait() + self.assertEqual(exp["sys/name"].fetch(), "some_name") diff --git a/tests/neptune/new/client/test_model_version_tables.py b/tests/neptune/new/client/test_model_version_tables.py new file mode 100644 index 000000000..f1e1d02db --- /dev/null +++ b/tests/neptune/new/client/test_model_version_tables.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import unittest + +from neptune.new import init_model +from neptune.new.internal.container_type import ContainerType +from tests.neptune.new.client.abstract_tables_test import AbstractTablesTestMixin + + +class TestModelVersionTables(AbstractTablesTestMixin, unittest.TestCase): + expected_container_type = ContainerType.MODEL_VERSION + + def get_table(self): + return init_model( + model="organization/project", + project="PRO-MOD", + mode="read-only", + ).fetch_model_versions_table() + + def get_table_entries(self, table): + return table.to_rows() diff --git a/tests/neptune/new/client/test_project.py b/tests/neptune/new/client/test_project.py new file mode 100644 index 000000000..4e93a4075 --- /dev/null +++ b/tests/neptune/new/client/test_project.py @@ -0,0 +1,99 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pylint: disable=protected-access +import os +import unittest + +from mock import patch + +from neptune.new import ANONYMOUS, init_project +from neptune.new.envs import API_TOKEN_ENV_NAME, PROJECT_ENV_NAME +from neptune.new.exceptions import ( + NeptuneException, + NeptuneMissingProjectNameException, +) +from neptune.new.internal.backends.api_model import ( + Attribute, + AttributeType, + IntAttribute, +) +from neptune.new.internal.backends.neptune_backend_mock import NeptuneBackendMock +from tests.neptune.new.client.abstract_experiment_test_mixin import ( + AbstractExperimentTestMixin, +) + + +@patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", + new=lambda _, _uuid, _type: [Attribute("test", AttributeType.STRING)], +) +@patch("neptune.new.internal.backends.factory.HostedNeptuneBackend", NeptuneBackendMock) +class TestClientProject(AbstractExperimentTestMixin, unittest.TestCase): + PROJECT_NAME = "organization/project" + + @staticmethod + def call_init(**kwargs): + return init_project(name=TestClientProject.PROJECT_NAME, **kwargs) + + @classmethod + def setUpClass(cls) -> None: + os.environ[API_TOKEN_ENV_NAME] = ANONYMOUS + + @classmethod + def setUp(cls) -> None: + if PROJECT_ENV_NAME in os.environ: + del os.environ[PROJECT_ENV_NAME] + + def test_offline_mode(self): + with self.assertRaises(NeptuneException): + init_project(name=self.PROJECT_NAME, mode="offline") + + def test_no_project_name(self): + with self.assertRaises(NeptuneMissingProjectNameException): + init_project(mode="async") + + def test_inexistent_project(self): + with self.assertRaises(NeptuneMissingProjectNameException): + init_project(mode="async") + + def test_project_name_env_var(self): + os.environ[PROJECT_ENV_NAME] = self.PROJECT_NAME + + project = init_project(mode="sync") + project["some/variable"] = 13 + self.assertEqual(13, project["some/variable"].fetch()) + + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_int_attribute", + new=lambda _, _uuid, _type, _path: IntAttribute(42), + ) + def test_read_only_mode(self): + project = init_project(name=self.PROJECT_NAME, mode="read-only") + + with self.assertLogs() as caplog: + project["some/variable"] = 13 + project["some/other_variable"] = 11 + self.assertEqual( + caplog.output, + [ + "WARNING:neptune.new.internal.operation_processors.read_only_operation_processor:" + "Client in read-only mode, nothing will be saved to server." + ], + ) + + self.assertEqual(42, project["some/variable"].fetch()) + self.assertNotIn(str(project._id), os.listdir(".neptune")) diff --git a/tests/neptune/new/client/test_run.py b/tests/neptune/new/client/test_run.py new file mode 100644 index 000000000..ed60c1fce --- /dev/null +++ b/tests/neptune/new/client/test_run.py @@ -0,0 +1,177 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pylint: disable=protected-access +import os +import unittest + +from mock import patch + +from neptune.new import ANONYMOUS, Run, get_last_run, init_run +from neptune.new.attributes.atoms import String +from neptune.new.envs import API_TOKEN_ENV_NAME, PROJECT_ENV_NAME +from neptune.new.exceptions import MissingFieldException, NeptuneUninitializedException +from neptune.new.internal.backends.api_model import ( + Attribute, + AttributeType, + IntAttribute, +) +from neptune.new.internal.backends.neptune_backend_mock import NeptuneBackendMock +from neptune.utils import IS_WINDOWS +from tests.neptune.new.client.abstract_experiment_test_mixin import ( + AbstractExperimentTestMixin, +) +from tests.neptune.new.utils.api_experiments_factory import api_run + +AN_API_RUN = api_run() + + +@patch("neptune.new.internal.backends.factory.HostedNeptuneBackend", NeptuneBackendMock) +class TestClientRun(AbstractExperimentTestMixin, unittest.TestCase): + @staticmethod + def call_init(**kwargs): + return init_run(**kwargs) + + @classmethod + def setUpClass(cls) -> None: + os.environ[PROJECT_ENV_NAME] = "organization/project" + os.environ[API_TOKEN_ENV_NAME] = ANONYMOUS + + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_metadata_container", + new=lambda _, container_id, expected_container_type: AN_API_RUN, + ) + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", + new=lambda _, _uuid, _type: [Attribute("some/variable", AttributeType.INT)], + ) + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_int_attribute", + new=lambda _, _uuid, _type, _path: IntAttribute(42), + ) + def test_read_only_mode(self): + exp = init_run(mode="read-only", run="whatever") + + with self.assertLogs() as caplog: + exp["some/variable"] = 13 + exp["some/other_variable"] = 11 + self.assertEqual( + caplog.output, + [ + "WARNING:neptune.new.internal.operation_processors.read_only_operation_processor:" + "Client in read-only mode, nothing will be saved to server." + ], + ) + + self.assertEqual(42, exp["some/variable"].fetch()) + self.assertNotIn(str(exp._id), os.listdir(".neptune")) + + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_metadata_container", + new=lambda _, container_id, expected_container_type: AN_API_RUN, + ) + @patch( + "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", + new=lambda _, _uuid, _type: [Attribute("test", AttributeType.STRING)], + ) + def test_resume(self): + with init_run(flush_period=0.5, run="whatever") as exp: + self.assertEqual(exp._id, AN_API_RUN.id) + self.assertIsInstance(exp.get_structure()["test"], String) + + @patch("neptune.new.internal.utils.source_code.sys.argv", ["main.py"]) + @patch("neptune.new.internal.init.run.os.path.isfile", new=lambda file: "." in file) + @patch( + "neptune.new.internal.utils.glob", + new=lambda path, recursive=False: [path.replace("*", "file.txt")], + ) + @patch( + "neptune.new.internal.utils.os.path.abspath", + new=lambda path: os.path.normpath("/home/user/main_dir/" + path), + ) + @patch("neptune.new.internal.utils.os.getcwd", new=lambda: "/home/user/main_dir") + @unittest.skipIf(IS_WINDOWS, "Linux/Mac test") + def test_entrypoint(self): + exp = init_run(mode="debug") + self.assertEqual(exp["source_code/entrypoint"].fetch(), "main.py") + + exp = init_run(mode="debug", source_files=[]) + self.assertEqual(exp["source_code/entrypoint"].fetch(), "main.py") + + exp = init_run(mode="debug", source_files=["../*"]) + self.assertEqual(exp["source_code/entrypoint"].fetch(), "main_dir/main.py") + + exp = init_run(mode="debug", source_files=["internal/*"]) + self.assertEqual(exp["source_code/entrypoint"].fetch(), "main.py") + + exp = init_run(mode="debug", source_files=["../other_dir/*"]) + self.assertEqual(exp["source_code/entrypoint"].fetch(), "../main_dir/main.py") + + @patch("neptune.new.internal.utils.source_code.sys.argv", ["main.py"]) + @patch("neptune.new.internal.utils.source_code.is_ipython", new=lambda: True) + def test_entrypoint_in_interactive_python(self): + exp = init_run(mode="debug") + with self.assertRaises(MissingFieldException): + exp["source_code/entrypoint"].fetch() + + exp = init_run(mode="debug", source_files=[]) + with self.assertRaises(MissingFieldException): + exp["source_code/entrypoint"].fetch() + + exp = init_run(mode="debug", source_files=["../*"]) + with self.assertRaises(MissingFieldException): + exp["source_code/entrypoint"].fetch() + + exp = init_run(mode="debug", source_files=["internal/*"]) + with self.assertRaises(MissingFieldException): + exp["source_code/entrypoint"].fetch() + + @patch("neptune.new.internal.utils.source_code.sys.argv", ["main.py"]) + @patch("neptune.new.internal.utils.source_code.get_common_root", new=lambda _: None) + @patch("neptune.new.internal.init.run.os.path.isfile", new=lambda file: "." in file) + @patch( + "neptune.new.internal.utils.glob", + new=lambda path, recursive=False: [path.replace("*", "file.txt")], + ) + @patch( + "neptune.new.internal.utils.os.path.abspath", + new=lambda path: os.path.normpath("/home/user/main_dir/" + path), + ) + def test_entrypoint_without_common_root(self): + exp = init_run(mode="debug", source_files=["../*"]) + self.assertEqual( + exp["source_code/entrypoint"].fetch(), "/home/user/main_dir/main.py" + ) + + exp = init_run(mode="debug", source_files=["internal/*"]) + self.assertEqual( + exp["source_code/entrypoint"].fetch(), "/home/user/main_dir/main.py" + ) + + def test_last_exp_is_raising_exception_when_non_initialized(self): + # given uninitialized run + Run.last_run = None + + # expect: raises NeptuneUninitializedException + with self.assertRaises(NeptuneUninitializedException): + get_last_run() + + def test_last_exp_is_the_latest_initialized(self): + # given two initialized runs + with init_run() as exp1, init_run() as exp2: + # expect: `neptune.latest_run` to be the latest initialized one + self.assertIsNot(exp1, get_last_run()) + self.assertIs(exp2, get_last_run()) diff --git a/tests/neptune/new/client/test_run_tables.py b/tests/neptune/new/client/test_run_tables.py new file mode 100644 index 000000000..f3aa4c45b --- /dev/null +++ b/tests/neptune/new/client/test_run_tables.py @@ -0,0 +1,31 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import unittest + +from neptune.new import get_project +from neptune.new.internal.container_type import ContainerType +from tests.neptune.new.client.abstract_tables_test import AbstractTablesTestMixin + + +class TestRunTables(AbstractTablesTestMixin, unittest.TestCase): + expected_container_type = ContainerType.RUN + + def get_table(self): + return get_project("organization/project").fetch_runs_table() + + def get_table_entries(self, table): + return table.to_rows() diff --git a/tests/neptune/new/internal/backends/test_hosted_file_operations.py b/tests/neptune/new/internal/backends/test_hosted_file_operations.py index 1381e6d20..2dc4783b3 100644 --- a/tests/neptune/new/internal/backends/test_hosted_file_operations.py +++ b/tests/neptune/new/internal/backends/test_hosted_file_operations.py @@ -34,7 +34,7 @@ ) from neptune.utils import IS_WINDOWS from tests.neptune.new.backend_test_mixin import BackendTestMixin -from tests.neptune.new.helpers import create_file +from tests.neptune.new.utils.file_helpers import create_file def set_expected_result(endpoint: MagicMock, value: dict): diff --git a/tests/neptune/new/internal/backends/test_neptune_backend_mock.py b/tests/neptune/new/internal/backends/test_neptune_backend_mock.py index 5aa5b7c5a..9813157ea 100644 --- a/tests/neptune/new/internal/backends/test_neptune_backend_mock.py +++ b/tests/neptune/new/internal/backends/test_neptune_backend_mock.py @@ -19,11 +19,7 @@ from random import randint from time import time -from neptune.new.exceptions import ( - MetadataInconsistency, - ProjectUUIDNotFound, - RunUUIDNotFound, -) +from neptune.new.exceptions import MetadataInconsistency, ContainerUUIDNotFound from neptune.new.internal.backends.api_model import ( DatetimeAttribute, FloatAttribute, @@ -54,10 +50,20 @@ class TestNeptuneBackendMock(unittest.TestCase): def setUp(self) -> None: self.backend = NeptuneBackendMock() - self.exp = self.backend.create_run(self.backend._project_id) + project_id = self.backend._project_id + exp = self.backend.create_run(project_id=project_id) + model = self.backend.create_model( + project_id=project_id, + key="MOD", + ) + model_version = self.backend.create_model_version( + project_id=project_id, model_id=model.id + ) self.ids_with_types = [ - (self.exp.id, ContainerType.RUN), (self.backend._project_id, ContainerType.PROJECT), + (exp.id, ContainerType.RUN), + (model.id, ContainerType.MODEL), + (model_version.id, ContainerType.MODEL_VERSION), ] def test_get_float_attribute(self): @@ -386,24 +392,14 @@ def test_get_string_set_attribute_wrong_type(self): def test_container_not_found(self): # given - ids_with_types_and_exceptions = [ - (container_id, container_type, exception_type) - for (container_id, container_type), exception_type in zip( - self.ids_with_types, [RunUUIDNotFound, ProjectUUIDNotFound] - ) - ] - for ( - container_id, - container_type, - exception_type, - ) in ids_with_types_and_exceptions: + for (container_id, container_type) in self.ids_with_types: with self.subTest(f"For containerType: {container_type}"): self.backend.execute_operations( container_id, container_type, [AssignString(["x"], "abc")] ) # then - with self.assertRaises(exception_type): + with self.assertRaises(ContainerUUIDNotFound): self.backend.get_float_series_attribute( str(uuid.uuid4()), container_type, ["x"] ) diff --git a/tests/neptune/new/internal/backends/test_nql.py b/tests/neptune/new/internal/backends/test_nql.py new file mode 100644 index 000000000..ee7d420c3 --- /dev/null +++ b/tests/neptune/new/internal/backends/test_nql.py @@ -0,0 +1,117 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +from neptune.new.internal.backends.nql import ( + NQLAggregator, + NQLAttributeOperator, + NQLAttributeType, + NQLQueryAttribute, + NQLQueryAggregate, +) + + +class TestNQL(unittest.TestCase): + def test_attributes(self): + self.assertEqual( + str( + NQLQueryAttribute( + name="sys/owner", + type=NQLAttributeType.STRING, + operator=NQLAttributeOperator.EQUALS, + value="user1", + ) + ), + '(`sys/owner`:string = "user1")', + ) + self.assertEqual( + str( + NQLQueryAttribute( + name="sys/tags", + type=NQLAttributeType.STRING_SET, + operator=NQLAttributeOperator.CONTAINS, + value="tag1", + ) + ), + '(`sys/tags`:stringSet CONTAINS "tag1")', + ) + self.assertEqual( + str( + NQLQueryAttribute( + name="sys/state", + type=NQLAttributeType.EXPERIMENT_STATE, + operator=NQLAttributeOperator.EQUALS, + value="running", + ) + ), + '(`sys/state`:experimentState = "running")', + ) + + def test_multiple_attribute_values(self): + self.assertEqual( + str( + NQLQueryAggregate( + items=[ + NQLQueryAttribute( + name="sys/owner", + type=NQLAttributeType.STRING, + operator=NQLAttributeOperator.EQUALS, + value=user, + ) + for user in ["user1", "user2"] + ], + aggregator=NQLAggregator.OR, + ) + ), + '((`sys/owner`:string = "user1") OR (`sys/owner`:string = "user2"))', + ) + + def test_multiple_queries(self): + self.assertEqual( + str( + NQLQueryAggregate( + items=[ + NQLQueryAggregate( + items=[ + NQLQueryAttribute( + name="sys/owner", + type=NQLAttributeType.STRING, + operator=NQLAttributeOperator.EQUALS, + value=user, + ) + for user in ["user1", "user2"] + ], + aggregator=NQLAggregator.OR, + ), + NQLQueryAggregate( + items=[ + NQLQueryAttribute( + name="sys/tags", + type=NQLAttributeType.STRING_SET, + operator=NQLAttributeOperator.CONTAINS, + value=tag, + ) + for tag in ["tag1", "tag2"] + ], + aggregator=NQLAggregator.OR, + ), + ], + aggregator=NQLAggregator.AND, + ) + ), + '(((`sys/owner`:string = "user1") OR (`sys/owner`:string = "user2")) AND ' + '((`sys/tags`:stringSet CONTAINS "tag1") OR (`sys/tags`:stringSet CONTAINS "tag2")))', + ) diff --git a/tests/neptune/new/internal/test_container_structure.py b/tests/neptune/new/internal/test_container_structure.py index d609838db..4ec9eca78 100644 --- a/tests/neptune/new/internal/test_container_structure.py +++ b/tests/neptune/new/internal/test_container_structure.py @@ -19,7 +19,7 @@ from neptune.new.exceptions import MetadataInconsistency from neptune.new.internal.backends.neptune_backend_mock import NeptuneBackendMock from neptune.new.internal.container_type import ContainerType -from neptune.new.internal.run_structure import ContainerStructure +from neptune.new.internal.container_structure import ContainerStructure from neptune.new.types.value import Value diff --git a/tests/neptune/new/internal/containers/test_disk_queue.py b/tests/neptune/new/internal/test_disk_queue.py similarity index 94% rename from tests/neptune/new/internal/containers/test_disk_queue.py rename to tests/neptune/new/internal/test_disk_queue.py index 49ed191f2..3a4ab5b0f 100644 --- a/tests/neptune/new/internal/containers/test_disk_queue.py +++ b/tests/neptune/new/internal/test_disk_queue.py @@ -21,8 +21,8 @@ from pathlib import Path from tempfile import TemporaryDirectory -from neptune.new.internal.container_type import ContainerType -from neptune.new.internal.containers.disk_queue import DiskQueue +from neptune.new.internal.disk_queue import DiskQueue + # pylint: disable=protected-access @@ -47,7 +47,6 @@ def test_put(self): self._serializer, self._deserializer, threading.RLock(), - ContainerType.RUN, ) obj = TestDiskQueue.Obj(5, "test") queue.put(obj) @@ -62,7 +61,6 @@ def test_multiple_files(self): self._serializer, self._deserializer, threading.RLock(), - ContainerType.RUN, max_file_size=300, ) for i in range(1, 101): @@ -83,7 +81,6 @@ def test_get_batch(self): self._serializer, self._deserializer, threading.RLock(), - ContainerType.RUN, max_file_size=100, ) for i in range(1, 91): @@ -115,7 +112,6 @@ def test_resuming_queue(self): self._serializer, self._deserializer, threading.RLock(), - ContainerType.RUN, max_file_size=999, ) for i in range(1, 501): @@ -143,7 +139,6 @@ def test_resuming_queue(self): self._serializer, self._deserializer, threading.RLock(), - ContainerType.RUN, max_file_size=200, ) for i in range(version_to_ack + 1, 501): diff --git a/tests/neptune/new/internal/utils/test_json_file_splitter.py b/tests/neptune/new/internal/utils/test_json_file_splitter.py index 3ba1b2b95..97eccc1ab 100644 --- a/tests/neptune/new/internal/utils/test_json_file_splitter.py +++ b/tests/neptune/new/internal/utils/test_json_file_splitter.py @@ -16,7 +16,7 @@ import unittest from neptune.new.internal.utils.json_file_splitter import JsonFileSplitter -from tests.neptune.new.helpers import create_file +from tests.neptune.new.utils.file_helpers import create_file class TestJsonFileSplitter(unittest.TestCase): diff --git a/neptune/new/internal/containers/__init__.py b/tests/neptune/new/sync/__init__.py similarity index 92% rename from neptune/new/internal/containers/__init__.py rename to tests/neptune/new/sync/__init__.py index 63b30720b..b5e585d90 100644 --- a/neptune/new/internal/containers/__init__.py +++ b/tests/neptune/new/sync/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# Copyright (c) 2022, Neptune Labs Sp. z o.o. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/neptune/new/sync/test_status.py b/tests/neptune/new/sync/test_status.py new file mode 100644 index 000000000..195e0feae --- /dev/null +++ b/tests/neptune/new/sync/test_status.py @@ -0,0 +1,107 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from unittest.mock import MagicMock + +import pytest + +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.operation import Operation +from neptune.new.sync import StatusRunner +from neptune.new.sync.utils import get_qualified_name +from tests.neptune.new.sync.utils import ( + prepare_metadata_container, + generate_get_metadata_container, +) + + +@pytest.fixture(name="backend") +def backend_fixture(): + return MagicMock() + + +@pytest.fixture(name="status_runner") +def status_runner_fixture(backend): + return StatusRunner(backend=backend) + + +@pytest.mark.parametrize("container_type", list(ContainerType)) +def test_list_containers( + tmp_path, mocker, capsys, backend, status_runner, container_type +): + # given + unsynced_container = prepare_metadata_container( + container_type=container_type, path=tmp_path, last_ack_version=1 + ) + synced_container = prepare_metadata_container( + container_type=container_type, path=tmp_path, last_ack_version=3 + ) + get_container_impl = generate_get_metadata_container( + registered_containers=(unsynced_container, synced_container) + ) + + # and + mocker.patch.object(backend, "get_metadata_container", get_container_impl) + mocker.patch.object(Operation, "from_dict") + + # when + status_runner.synchronization_status(tmp_path) + + # then + captured = capsys.readouterr() + assert captured.err == "" + assert ( + "Synchronized objects:\n- {}".format(get_qualified_name(synced_container)) + in captured.out + ) + assert ( + "Unsynchronized objects:\n- {}".format(get_qualified_name(unsynced_container)) + in captured.out + ) + + +def test_list_offline_runs(tmp_path, mocker, capsys, status_runner): + # given + offline_run = prepare_metadata_container( + container_type=ContainerType.RUN, + path=tmp_path, + last_ack_version=None, + ) + + # and + mocker.patch.object(Operation, "from_dict") + + # when + status_runner.synchronization_status(tmp_path) + + # then + captured = capsys.readouterr() + assert captured.err == "" + assert ( + "Unsynchronized offline objects:\n- offline/run__{}".format(offline_run.id) + in captured.out + ) + + +def test_list_runs_when_no_run(tmp_path, capsys, status_runner): + (tmp_path / "async").mkdir() + # when + with pytest.raises(SystemExit): + status_runner.synchronization_status(tmp_path) + + # then + captured = capsys.readouterr() + assert captured.err == "" + assert "There are no Neptune objects" in captured.out diff --git a/tests/neptune/new/sync/test_sync.py b/tests/neptune/new/sync/test_sync.py new file mode 100644 index 000000000..555fffcd8 --- /dev/null +++ b/tests/neptune/new/sync/test_sync.py @@ -0,0 +1,299 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from unittest.mock import MagicMock + +import pytest + +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.operation import Operation +from neptune.new.sync import SyncRunner +from neptune.new.sync.utils import get_qualified_name +from tests.neptune.new.sync.utils import ( + generate_get_metadata_container, + prepare_metadata_container, + execute_operations, + prepare_deprecated_run, +) + + +@pytest.fixture(name="backend") +def backend_fixture(): + backend = MagicMock() + backend.execute_operations.side_effect = execute_operations + return backend + + +@pytest.fixture(name="sync_runner") +def sync_runner_fixture(backend): + return SyncRunner(backend=backend) + + +@pytest.mark.parametrize("container_type", list(ContainerType)) +def test_sync_all_runs(tmp_path, mocker, capsys, backend, sync_runner, container_type): + # given + unsynced_container = prepare_metadata_container( + container_type=container_type, path=tmp_path, last_ack_version=1 + ) + synced_container = prepare_metadata_container( + container_type=container_type, path=tmp_path, last_ack_version=3 + ) + get_container_impl = generate_get_metadata_container( + registered_containers=(unsynced_container, synced_container) + ) + + # and + mocker.patch.object(backend, "get_metadata_container", get_container_impl) + mocker.patch.object(Operation, "from_dict", lambda x: x) + + # when + sync_runner.sync_all_containers(tmp_path, "foo") + + # then + captured = capsys.readouterr() + assert captured.err == "" + + # expect output for unsynced run + assert f"Synchronising {get_qualified_name(unsynced_container)}" in captured.out + assert ( + f"Synchronization of {container_type.value} {get_qualified_name(unsynced_container)} completed." + in captured.out + ) + + # expect NO output for synced run + assert f"Synchronising {get_qualified_name(synced_container)}" not in captured.out + + # and + backend.execute_operations.has_calls( + [ + mocker.call(unsynced_container.id, ContainerType.RUN, ["op-1", "op-2"]), + ], + any_order=True, + ) + + +def test_sync_all_offline_runs(tmp_path, mocker, capsys, backend, sync_runner): + # given + offline_run = prepare_metadata_container( + container_type=ContainerType.RUN, path=tmp_path, last_ack_version=None + ) + get_run_impl = generate_get_metadata_container(registered_containers=(offline_run,)) + + # and + mocker.patch.object(backend, "get_metadata_container", get_run_impl) + mocker.patch.object( + sync_runner, + "_register_offline_run", + lambda project, container_type: offline_run, + ) + mocker.patch.object(Operation, "from_dict", lambda x: x) + + # when + sync_runner.sync_all_containers(tmp_path, "foo") + + # then + captured = capsys.readouterr() + assert captured.err == "" + assert ( + "Offline run run__{} registered as {}".format( + f"{offline_run.id}", get_qualified_name(offline_run) + ) + ) in captured.out + + # and + backend.execute_operations.has_calls( + [ + mocker.call(offline_run.id, ContainerType.RUN, ["op-1", "op-2"]), + ], + any_order=True, + ) + + +def test_sync_selected_runs(tmp_path, mocker, capsys, backend, sync_runner): + # given + unsync_exp = prepare_metadata_container( + container_type=ContainerType.RUN, path=tmp_path, last_ack_version=1 + ) # won't be synced, despite fact it's not synced yet + sync_exp = prepare_metadata_container( + container_type=ContainerType.RUN, path=tmp_path, last_ack_version=3 + ) # will be synced despite fact that it's up to date + offline_run = prepare_metadata_container( + container_type=ContainerType.RUN, path=tmp_path, last_ack_version=None + ) # will be synced + get_run_impl = generate_get_metadata_container( + registered_containers=[ + unsync_exp, + sync_exp, + offline_run, + ] + ) + + # and + mocker.patch.object(backend, "get_metadata_container", get_run_impl) + mocker.patch.object( + sync_runner, + "_register_offline_run", + lambda project, container_type: offline_run, + ) + mocker.patch.object(Operation, "from_dict", lambda x: x) + + # when + sync_runner.sync_selected_containers( + base_path=tmp_path, + project_name="some-name", + container_names=[ + get_qualified_name(sync_exp), + "offline/run__" + offline_run.id, + ], + ) + + # then + captured = capsys.readouterr() + # expect no errors + assert captured.err == "" + + # expected output for mentioned async exp + assert "Synchronising {}".format(get_qualified_name(sync_exp)) in captured.out + assert ( + "Synchronization of run {} completed.".format(get_qualified_name(sync_exp)) + in captured.out + ) + + # expected output for offline container + assert ( + "Offline run run__{} registered as {}".format( + f"{offline_run.id}", get_qualified_name(offline_run) + ) + ) in captured.out + assert "Synchronising {}".format(get_qualified_name(offline_run)) in captured.out + assert ( + "Synchronization of run {} completed.".format(get_qualified_name(offline_run)) + in captured.out + ) + + # expected NO output for not mentioned async container + assert "Synchronising {}".format(get_qualified_name(unsync_exp)) not in captured.out + + # and + backend.execute_operations.has_calls( + [ + mocker.call( + sync_exp.id, + ContainerType.RUN, + operations=["op-1", "op-2"], + ), + mocker.call( + offline_run.id, + ContainerType.RUN, + operations=["op-0", "op-1", "op-2"], + ), + ], + any_order=True, + ) + + +def test_sync_deprecated_runs(tmp_path, mocker, capsys, backend, sync_runner): + # given + deprecated_unsynced_run = prepare_deprecated_run(path=tmp_path, last_ack_version=1) + offline_old_run = prepare_deprecated_run(path=tmp_path, last_ack_version=None) + get_container_impl = generate_get_metadata_container( + registered_containers=(deprecated_unsynced_run, offline_old_run) + ) + + # and + mocker.patch.object(backend, "get_metadata_container", get_container_impl) + mocker.patch.object( + sync_runner, + "_register_offline_run", + lambda project, container_type: offline_old_run, + ) + mocker.patch.object(Operation, "from_dict", lambda x: x) + + # when + sync_runner.sync_all_containers(tmp_path, "foo") + + # then + captured = capsys.readouterr() + assert captured.err == "" + + assert ( + "Offline run {} registered as {}".format( + f"{offline_old_run.id}", get_qualified_name(offline_old_run) + ) + ) in captured.out + + assert ( + "Synchronising {}".format(get_qualified_name(deprecated_unsynced_run)) + in captured.out + ) + assert ( + "Synchronization of run {} completed.".format( + get_qualified_name(deprecated_unsynced_run) + ) + in captured.out + ) + assert ( + "Synchronising {}".format(get_qualified_name(offline_old_run)) in captured.out + ) + assert ( + "Synchronization of run {} completed.".format( + get_qualified_name(offline_old_run) + ) + in captured.out + ) + + # and + backend.execute_operations.has_calls( + [ + mocker.call( + deprecated_unsynced_run.id, + ContainerType.RUN, + operations=["op-1", "op-2"], + ), + mocker.call( + offline_old_run.id, + ContainerType.RUN, + operations=["op-0", "op-1", "op-2"], + ), + ], + any_order=True, + ) + + +def test_sync_non_existent_container(tmp_path, capsys, sync_runner): + # when + sync_runner.sync_selected_containers( + base_path=tmp_path, project_name="foo", container_names=["bar"] + ) + + # then + captured = capsys.readouterr() + assert "Warning: Run 'bar' does not exist in location" in captured.err + + +def test_sync_non_existent_offline_containers(tmp_path, capsys, sync_runner): + # when + sync_runner.sync_selected_containers( + base_path=tmp_path, project_name="foo", container_names=["offline/foo__bar"] + ) + sync_runner.sync_selected_containers( + base_path=tmp_path, project_name="foo", container_names=["offline/model__bar"] + ) + + # then + captured = capsys.readouterr() + assert "Offline run foo__bar not found on disk." in captured.err + assert "Offline run model__bar not found on disk." in captured.err diff --git a/tests/neptune/new/sync/test_utils.py b/tests/neptune/new/sync/test_utils.py new file mode 100644 index 000000000..c6638ed21 --- /dev/null +++ b/tests/neptune/new/sync/test_utils.py @@ -0,0 +1,45 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from unittest.mock import MagicMock + +import pytest + +from neptune.new.exceptions import ProjectNotFound +from neptune.new.sync.utils import get_project + + +@pytest.fixture(name="backend") +def backend_fixture(): + return MagicMock() + + +def test_get_project_no_name_set(mocker, backend): + # given + mocker.patch.object(os, "getenv") + os.getenv.return_value = None + + # expect + assert get_project(None, backend=backend) is None + + +def test_get_project_project_not_found(backend): + # given + backend.get_project.side_effect = ProjectNotFound("foo") + + # expect + assert get_project("foo", backend=backend) is None diff --git a/tests/neptune/new/sync/utils.py b/tests/neptune/new/sync/utils.py new file mode 100644 index 000000000..c2e0fe2b4 --- /dev/null +++ b/tests/neptune/new/sync/utils.py @@ -0,0 +1,110 @@ +# +# Copyright (c) 2022, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import threading +from pathlib import Path +from typing import Optional + +from neptune.new.constants import OFFLINE_DIRECTORY, ASYNC_DIRECTORY +from neptune.new.exceptions import MetadataContainerNotFound +from neptune.new.internal.backends.api_model import ApiExperiment +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.disk_queue import DiskQueue +from neptune.new.internal.utils.sync_offset_file import SyncOffsetFile +from neptune.new.sync.utils import get_qualified_name +from tests.neptune.new.utils.api_experiments_factory import ( + api_run, + api_metadata_container, +) + + +def generate_get_metadata_container(registered_containers): + def get_metadata_container(container_id, expected_container_type: ContainerType): + """This function will return run as well as projects. Will be cleaned in ModelRegistry""" + for exp in registered_containers: + if container_id in (str(exp.id), get_qualified_name(exp)): + return exp + + raise MetadataContainerNotFound.of_container_type( + container_type=expected_container_type, container_id=container_id + ) + + return get_metadata_container + + +# pylint: disable=unused-argument +def execute_operations(container_id, container_type, operations): + return len(operations), [] + + +def _prepare_disk_queue(*, exp_path, last_ack_version): + exp_path.mkdir(parents=True) + queue = DiskQueue( + dir_path=exp_path, + to_dict=lambda x: x, + from_dict=lambda x: x, + lock=threading.RLock(), + ) + queue.put("op-0") + queue.put("op-1") + queue.put("op-2") + + SyncOffsetFile(exp_path / "last_put_version").write(3) + if last_ack_version is not None: + SyncOffsetFile(exp_path / "last_ack_version").write(last_ack_version) + + +def prepare_metadata_container( + *, container_type: ContainerType, path: Path, last_ack_version: Optional[int] +) -> ApiExperiment: + is_offline = last_ack_version is None + + container = api_metadata_container(container_type) + + if is_offline: + exp_path = path / OFFLINE_DIRECTORY / f"{container.type.value}__{container.id}" + else: + execution_id = "exec-0" + exp_path = ( + path + / ASYNC_DIRECTORY + / f"{container.type.value}__{container.id}" + / execution_id + ) + + _prepare_disk_queue( + exp_path=exp_path, + last_ack_version=last_ack_version, + ) + + return container + + +def prepare_deprecated_run(*, path: Path, last_ack_version: Optional[int]): + is_offline = last_ack_version is None + + run = api_run() + + if is_offline: + exp_path = path / OFFLINE_DIRECTORY / run.id + else: + execution_id = "exec-0" + exp_path = path / ASYNC_DIRECTORY / run.id / execution_id + _prepare_disk_queue( + exp_path=exp_path, + last_ack_version=last_ack_version, + ) + return run diff --git a/tests/neptune/new/test_client.py b/tests/neptune/new/test_client.py deleted file mode 100644 index dcefc722f..000000000 --- a/tests/neptune/new/test_client.py +++ /dev/null @@ -1,479 +0,0 @@ -# -# Copyright (c) 2020, Neptune Labs Sp. z o.o. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# pylint: disable=protected-access -import os -import unittest -import uuid -from datetime import datetime - -from mock import Mock, patch - -from neptune.new import ANONYMOUS, Run, get_last_run, get_project, init, init_project -from neptune.new.attributes.atoms import String -from neptune.new.envs import API_TOKEN_ENV_NAME, PROJECT_ENV_NAME -from neptune.new.exceptions import ( - MetadataInconsistency, - NeptuneException, - NeptuneOfflineModeFetchException, - NeptuneUninitializedException, - NeptuneMissingProjectNameException, -) -from neptune.new.internal.backends.api_model import ( - ApiRun, - Attribute, - AttributeType, - AttributeWithProperties, - LeaderboardEntry, - IntAttribute, -) -from neptune.new.internal.backends.neptune_backend_mock import NeptuneBackendMock -from neptune.new.internal.container_type import ContainerType -from neptune.utils import IS_WINDOWS - - -@patch("neptune.new.internal.backends.factory.HostedNeptuneBackend", NeptuneBackendMock) -class TestClientRun(unittest.TestCase): - @classmethod - def setUpClass(cls) -> None: - os.environ[PROJECT_ENV_NAME] = "organization/project" - os.environ[API_TOKEN_ENV_NAME] = ANONYMOUS - - def test_incorrect_mode(self): - with self.assertRaises(ValueError): - init(mode="srtgj") - - def test_debug_mode(self): - exp = init(mode="debug") - exp["some/variable"] = 13 - self.assertEqual(13, exp["some/variable"].fetch()) - self.assertNotIn(str(exp._id), os.listdir(".neptune")) - - def test_offline_mode(self): - exp = init(mode="offline") - exp["some/variable"] = 13 - with self.assertRaises(NeptuneOfflineModeFetchException): - exp["some/variable"].fetch() - self.assertIn(str(exp._id), os.listdir(".neptune/offline")) - self.assertIn("data-1.log", os.listdir(".neptune/offline/{}".format(exp._id))) - - def test_sync_mode(self): - exp = init(mode="sync") - exp["some/variable"] = 13 - exp["copied/variable"] = exp["some/variable"] - self.assertEqual(13, exp["some/variable"].fetch()) - self.assertEqual(13, exp["copied/variable"].fetch()) - self.assertNotIn(str(exp._id), os.listdir(".neptune")) - - def test_async_mode(self): - with init(mode="async", flush_period=0.5) as exp: - exp["some/variable"] = 13 - exp["copied/variable"] = exp["some/variable"] - with self.assertRaises(MetadataInconsistency): - exp["some/variable"].fetch() - exp.wait() - self.assertEqual(13, exp["some/variable"].fetch()) - self.assertEqual(13, exp["copied/variable"].fetch()) - self.assertIn(str(exp._id), os.listdir(".neptune/async")) - execution_dir = os.listdir(".neptune/async/{}".format(exp._id))[0] - self.assertIn( - "data-1.log", - os.listdir(".neptune/async/{}/{}".format(exp._id, execution_dir)), - ) - - @patch( - "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_run", - new=lambda _, _id: ApiRun( - "12345678-1234-5678-1234-567812345678", - "SAN-94", - "workspace", - "sandbox", - False, - ), - ) - @patch( - "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", - new=lambda _, _uuid, _type: [Attribute("some/variable", AttributeType.INT)], - ) - @patch( - "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_int_attribute", - new=lambda _, _uuid, _type, _path: IntAttribute(42), - ) - def test_read_only_mode(self): - exp = init(mode="read-only", run="SAN-94") - - with self.assertLogs() as caplog: - exp["some/variable"] = 13 - exp["some/other_variable"] = 11 - self.assertEqual( - caplog.output, - [ - "WARNING:neptune.new.internal.operation_processors.read_only_operation_processor:" - "Client in read-only mode, nothing will be saved to server." - ], - ) - - self.assertEqual(42, exp["some/variable"].fetch()) - self.assertNotIn(str(exp._id), os.listdir(".neptune")) - - @patch( - "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_run", - new=lambda _, _id: ApiRun( - "12345678-1234-5678-1234-567812345678", - "SAN-94", - "workspace", - "sandbox", - False, - ), - ) - @patch( - "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", - new=lambda _, _uuid, _type: [Attribute("test", AttributeType.STRING)], - ) - def test_resume(self): - with init(flush_period=0.5, run="SAN-94") as exp: - self.assertEqual(exp._id, "12345678-1234-5678-1234-567812345678") - self.assertIsInstance(exp.get_structure()["test"], String) - - @patch("neptune.new.internal.utils.source_code.sys.argv", ["main.py"]) - @patch("neptune.new.internal.init_run.os.path.isfile", new=lambda file: "." in file) - @patch( - "neptune.new.internal.utils.glob", - new=lambda path, recursive=False: [path.replace("*", "file.txt")], - ) - @patch( - "neptune.new.internal.utils.os.path.abspath", - new=lambda path: os.path.normpath("/home/user/main_dir/" + path), - ) - @patch("neptune.new.internal.utils.os.getcwd", new=lambda: "/home/user/main_dir") - @unittest.skipIf(IS_WINDOWS, "Linux/Mac test") - def test_entrypoint(self): - exp = init(mode="debug") - self.assertEqual(exp["source_code/entrypoint"].fetch(), "main.py") - - exp = init(mode="debug", source_files=[]) - self.assertEqual(exp["source_code/entrypoint"].fetch(), "main.py") - - exp = init(mode="debug", source_files=["../*"]) - self.assertEqual(exp["source_code/entrypoint"].fetch(), "main_dir/main.py") - - exp = init(mode="debug", source_files=["internal/*"]) - self.assertEqual(exp["source_code/entrypoint"].fetch(), "main.py") - - exp = init(mode="debug", source_files=["../other_dir/*"]) - self.assertEqual(exp["source_code/entrypoint"].fetch(), "../main_dir/main.py") - - @patch("neptune.new.internal.utils.source_code.sys.argv", ["main.py"]) - @patch("neptune.new.internal.utils.source_code.is_ipython", new=lambda: True) - def test_entrypoint_in_interactive_python(self): - exp = init(mode="debug") - with self.assertRaises(AttributeError): - exp["source_code/entrypoint"].get() - - exp = init(mode="debug", source_files=[]) - with self.assertRaises(AttributeError): - exp["source_code/entrypoint"].get() - - exp = init(mode="debug", source_files=["../*"]) - with self.assertRaises(AttributeError): - exp["source_code/entrypoint"].get() - - exp = init(mode="debug", source_files=["internal/*"]) - with self.assertRaises(AttributeError): - exp["source_code/entrypoint"].get() - - @patch("neptune.new.internal.utils.source_code.sys.argv", ["main.py"]) - @patch("neptune.new.internal.utils.source_code.get_common_root", new=lambda _: None) - @patch("neptune.new.internal.init_run.os.path.isfile", new=lambda file: "." in file) - @patch( - "neptune.new.internal.utils.glob", - new=lambda path, recursive=False: [path.replace("*", "file.txt")], - ) - @patch( - "neptune.new.internal.utils.os.path.abspath", - new=lambda path: os.path.normpath("/home/user/main_dir/" + path), - ) - def test_entrypoint_without_common_root(self): - exp = init(mode="debug", source_files=["../*"]) - self.assertEqual( - exp["source_code/entrypoint"].fetch(), "/home/user/main_dir/main.py" - ) - - exp = init(mode="debug", source_files=["internal/*"]) - self.assertEqual( - exp["source_code/entrypoint"].fetch(), "/home/user/main_dir/main.py" - ) - - def test_last_exp_is_raising_exception_when_non_initialized(self): - # given uninitialized run - Run.last_run = None - - # expect: raises NeptuneUninitializedException - with self.assertRaises(NeptuneUninitializedException): - get_last_run() - - def test_last_exp_is_the_latest_initialized(self): - # given two initialized runs - with init() as exp1, init() as exp2: - # expect: `neptune.latest_run` to be the latest initialized one - self.assertIsNot(exp1, get_last_run()) - self.assertIs(exp2, get_last_run()) - - -@patch( - "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_run", - new=lambda _, _id: ApiRun( - "12345678-1234-5678-1234-567812345678", - "SAN-94", - "workspace", - "sandbox", - False, - ), -) -@patch( - "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", - new=lambda _, _uuid, _type: [Attribute("test", AttributeType.STRING)], -) -@patch("neptune.new.internal.backends.factory.HostedNeptuneBackend", NeptuneBackendMock) -class TestClientProject(unittest.TestCase): - PROJECT_NAME = "organization/project" - - @classmethod - def setUpClass(cls) -> None: - os.environ[API_TOKEN_ENV_NAME] = ANONYMOUS - - @classmethod - def setUp(cls) -> None: - if PROJECT_ENV_NAME in os.environ: - del os.environ[PROJECT_ENV_NAME] - - def test_incorrect_mode(self): - with self.assertRaises(ValueError): - init_project(name=self.PROJECT_NAME, mode="srtgj") - - def test_debug_mode(self): - project = init_project(name=self.PROJECT_NAME, mode="debug") - project["some/variable"] = 13 - self.assertEqual(13, project["some/variable"].fetch()) - self.assertNotIn(str(project._id), os.listdir(".neptune")) - - def test_offline_mode_for_project(self): - with self.assertRaises(NeptuneException): - init_project(name=self.PROJECT_NAME, mode="offline") - - def test_offline_mode_for_run(self): - run = init(name=self.PROJECT_NAME, mode="offline") - run["some/variable"] = 13 - with self.assertRaises(NeptuneOfflineModeFetchException): - run["some/variable"].fetch() - self.assertIn(str(run._id), os.listdir(".neptune/offline")) - self.assertIn("data-1.log", os.listdir(".neptune/offline/{}".format(run._id))) - - def test_sync_mode(self): - project = init_project(name=self.PROJECT_NAME, mode="sync") - project["some/variable"] = 13 - self.assertEqual(13, project["some/variable"].fetch()) - self.assertNotIn(str(project._id), os.listdir(".neptune")) - - def test_async_mode(self): - with init_project( - name=self.PROJECT_NAME, mode="async", flush_period=0.5 - ) as project: - project["some/variable"] = 13 - with self.assertRaises(MetadataInconsistency): - project["some/variable"].fetch() - project.wait() - self.assertEqual(13, project["some/variable"].fetch()) - self.assertIn(str(project._id), os.listdir(".neptune/async")) - execution_dir = os.listdir(".neptune/async/{}".format(project._id))[0] - self.assertIn( - "data-1.log", - os.listdir(".neptune/async/{}/{}".format(project._id, execution_dir)), - ) - - def test_no_project_name(self): - with self.assertRaises(NeptuneMissingProjectNameException): - init_project(mode="async") - - def test_inexistent_project(self): - with self.assertRaises(NeptuneMissingProjectNameException): - init_project(mode="async") - - def test_project_name_env_var(self): - os.environ[PROJECT_ENV_NAME] = self.PROJECT_NAME - - project = init_project(mode="sync") - project["some/variable"] = 13 - self.assertEqual(13, project["some/variable"].fetch()) - - @patch( - "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_run", - new=lambda _, _id: ApiRun( - "12345678-1234-5678-1234-567812345678", - "SAN-94", - "workspace", - "sandbox", - False, - ), - ) - @patch( - "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_attributes", - new=lambda _, _uuid, _type: [Attribute("some/variable", AttributeType.INT)], - ) - @patch( - "neptune.new.internal.backends.neptune_backend_mock.NeptuneBackendMock.get_int_attribute", - new=lambda _, _uuid, _type, _path: IntAttribute(42), - ) - def test_read_only_mode(self): - project = init_project(name=self.PROJECT_NAME, mode="read-only") - - with self.assertLogs() as caplog: - project["some/variable"] = 13 - project["some/other_variable"] = 11 - self.assertEqual( - caplog.output, - [ - "WARNING:neptune.new.internal.operation_processors.read_only_operation_processor:" - "Client in read-only mode, nothing will be saved to server." - ], - ) - - self.assertEqual(42, project["some/variable"].fetch()) - self.assertNotIn(str(project._id), os.listdir(".neptune")) - - @staticmethod - def build_attributes_leaderboard(now: datetime): - attributes = [] - attributes.append( - AttributeWithProperties( - "run/state", AttributeType.RUN_STATE, Mock(value="idle") - ) - ) - attributes.append( - AttributeWithProperties("float", AttributeType.FLOAT, Mock(value=12.5)) - ) - attributes.append( - AttributeWithProperties( - "string", AttributeType.STRING, Mock(value="some text") - ) - ) - attributes.append( - AttributeWithProperties("datetime", AttributeType.DATETIME, Mock(value=now)) - ) - attributes.append( - AttributeWithProperties( - "float/series", AttributeType.FLOAT_SERIES, Mock(last=8.7) - ) - ) - attributes.append( - AttributeWithProperties( - "string/series", AttributeType.STRING_SERIES, Mock(last="last text") - ) - ) - attributes.append( - AttributeWithProperties( - "string/set", AttributeType.STRING_SET, Mock(values=["a", "b"]) - ) - ) - attributes.append( - AttributeWithProperties( - "git/ref", - AttributeType.GIT_REF, - Mock(commit=Mock(commitId="abcdef0123456789")), - ) - ) - attributes.append(AttributeWithProperties("file", AttributeType.FILE, None)) - attributes.append( - AttributeWithProperties("file/set", AttributeType.FILE_SET, None) - ) - attributes.append( - AttributeWithProperties("image/series", AttributeType.IMAGE_SERIES, None) - ) - return attributes - - @patch.object(NeptuneBackendMock, "get_leaderboard") - def test_get_table_as_pandas(self, get_leaderboard): - # given - now = datetime.now() - attributes = self.build_attributes_leaderboard(now) - - # and - empty_entry = LeaderboardEntry(str(uuid.uuid4()), []) - filled_entry = LeaderboardEntry(str(uuid.uuid4()), attributes) - get_leaderboard.return_value = [empty_entry, filled_entry] - - # when - df = get_project(self.PROJECT_NAME).fetch_runs_table().to_pandas() - - # then - self.assertEqual("idle", df["run/state"][1]) - self.assertEqual(12.5, df["float"][1]) - self.assertEqual("some text", df["string"][1]) - self.assertEqual(now, df["datetime"][1]) - self.assertEqual(8.7, df["float/series"][1]) - self.assertEqual("last text", df["string/series"][1]) - self.assertEqual("a,b", df["string/set"][1]) - self.assertEqual("abcdef0123456789", df["git/ref"][1]) - - with self.assertRaises(KeyError): - self.assertTrue(df["file"]) - with self.assertRaises(KeyError): - self.assertTrue(df["file/set"]) - with self.assertRaises(KeyError): - self.assertTrue(df["image/series"]) - - @patch.object(NeptuneBackendMock, "get_leaderboard") - @patch.object(NeptuneBackendMock, "download_file") - @patch.object(NeptuneBackendMock, "download_file_set") - def test_get_table_as_runs(self, download_file_set, download_file, get_leaderboard): - # given - exp_id = str(uuid.uuid4()) - now = datetime.now() - attributes = self.build_attributes_leaderboard(now) - - # and - get_leaderboard.return_value = [LeaderboardEntry(exp_id, attributes)] - - # when - exp = get_project(self.PROJECT_NAME).fetch_runs_table().to_runs()[0] - - # then - self.assertEqual("idle", exp["run/state"].get()) - self.assertEqual("idle", exp["run"]["state"].get()) - self.assertEqual(12.5, exp["float"].get()) - self.assertEqual("some text", exp["string"].get()) - self.assertEqual(now, exp["datetime"].get()) - self.assertEqual(8.7, exp["float/series"].get()) - self.assertEqual("last text", exp["string/series"].get()) - self.assertEqual({"a", "b"}, exp["string/set"].get()) - self.assertEqual("abcdef0123456789", exp["git/ref"].get()) - - with self.assertRaises(MetadataInconsistency): - exp["file"].get() - with self.assertRaises(MetadataInconsistency): - exp["file/set"].get() - with self.assertRaises(MetadataInconsistency): - exp["image/series"].get() - - exp["file"].download("some_directory") - download_file.assert_called_with( - exp_id, ContainerType.RUN, ["file"], "some_directory" - ) - - exp["file/set"].download("some_directory") - download_file_set.assert_called_with( - exp_id, ContainerType.RUN, ["file", "set"], "some_directory" - ) diff --git a/tests/neptune/new/test_experiment.py b/tests/neptune/new/test_experiment.py new file mode 100644 index 000000000..1468a192a --- /dev/null +++ b/tests/neptune/new/test_experiment.py @@ -0,0 +1,247 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import unittest +from datetime import datetime + +from neptune.new import ( + ANONYMOUS, + Run, + init, + init_model, + init_project, + init_run, + init_model_version, +) +from neptune.new.envs import API_TOKEN_ENV_NAME, PROJECT_ENV_NAME +from neptune.new.exceptions import ( + InactiveModelException, + InactiveModelVersionException, + InactiveProjectException, + InactiveRunException, + MetadataInconsistency, + NeptuneProtectedPathException, +) +from neptune.new.metadata_containers import Model +from neptune.new.metadata_containers import ModelVersion +from neptune.new.metadata_containers import Project +from neptune.new.types.atoms.float import Float +from neptune.new.types.atoms.string import String +from neptune.new.types.series import FloatSeries, StringSeries + + +class TestExperiment(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + os.environ[PROJECT_ENV_NAME] = "organization/project" + os.environ[API_TOKEN_ENV_NAME] = ANONYMOUS + + @classmethod + def get_experiments(cls, flush_period=None): + kwargs = {"mode": "debug"} + if flush_period is not None: + kwargs["flush_period"] = flush_period + + run1 = init(**kwargs) + run2 = init_run(**kwargs) + project = init_project(**kwargs) + model = init_model(key="MOD", **kwargs) + return [run1, run2, project, model] + + def test_define(self): + for exp in self.get_experiments(flush_period=0.5): + with self.subTest(msg=f"For type {exp.container_type}"): + exp.define("some/path/value", Float(5), wait=True) + self.assertEqual( + exp.get_structure()["some"]["path"]["value"].fetch(), 5 + ) + + def test_define_string(self): + for exp in self.get_experiments(flush_period=0.5): + with self.subTest(msg=f"For type {exp.container_type}"): + exp.define("some/path/value", String("Some string"), wait=True) + self.assertEqual( + exp.get_structure()["some"]["path"]["value"].fetch(), "Some string" + ) + + def test_define_few_variables(self): + for exp in self.get_experiments(flush_period=0.5): + with self.subTest(msg=f"For type {exp.container_type}"): + exp.define("some/path/num", Float(3)) + exp.define("some/path/text", String("Some text"), wait=True) + self.assertEqual(exp.get_structure()["some"]["path"]["num"].fetch(), 3) + self.assertEqual( + exp.get_structure()["some"]["path"]["text"].fetch(), "Some text" + ) + + def test_define_conflict(self): + for exp in self.get_experiments(flush_period=0.5): + with self.subTest(msg=f"For type {exp.container_type}"): + exp.define("some/path/value", Float(5)) + with self.assertRaises(MetadataInconsistency): + exp.define("some/path/value", Float(1)) + + def test_pop(self): + for exp in self.get_experiments(flush_period=0.5): + with self.subTest(msg=f"For type {exp.container_type}"): + exp.define("some/path/num", Float(3)) + exp.define("some/path/text", String("Some text")) + exp.pop("some/path/text") + self.assertTrue("num" in exp.get_structure()["some"]["path"]) + self.assertTrue("text" not in exp.get_structure()["some"]["path"]) + + def test_pop_namespace(self): + for exp in self.get_experiments(flush_period=0.5): + with self.subTest(msg=f"For type {exp.container_type}"): + exp.define("some/path/subpath/num", Float(3)) + exp.define("some/path/text", String("Some text")) + exp.define("some/otherpath", Float(4)) + exp.pop("some/path") + self.assertTrue("path" not in exp.get_structure()["some"]) + + def test_run_as_handler(self): + for exp in self.get_experiments(flush_period=0.5): + with self.subTest(msg=f"For type {exp.container_type}"): + exp.define("some/path/num", Float(3)) + exp.define("some/path/text", String("Some text")) + handler = exp["some/path"] + exp.wait() + self.assertEqual(handler["num"].fetch(), 3) + self.assertEqual(handler["text"].fetch(), "Some text") + + def test_assign_dict(self): + for exp in self.get_experiments(flush_period=0.5): + with self.subTest(msg=f"For type {exp.container_type}"): + now = datetime.now() + exp.assign( + { + "x": 5, + "metadata": {"name": "Trol", "age": 376}, + "toys": StringSeries(["cudgel", "hat"]), + "nested": {"nested": {"deep_secret": FloatSeries([13, 15])}}, + "simple_types": { + "int": 42, + "str": "imagine", + "float": 3.14, + "datetime": now, + "list": list(range(10)), + }, + } + ) + self.assertEqual(exp["x"].fetch(), 5) + self.assertEqual(exp["metadata/name"].fetch(), "Trol") + self.assertEqual(exp["metadata/age"].fetch(), 376) + self.assertEqual(exp["toys"].fetch_last(), "hat") + self.assertEqual(exp["nested/nested/deep_secret"].fetch_last(), 15) + self.assertEqual(exp["simple_types/int"].fetch(), 42) + self.assertEqual(exp["simple_types/str"].fetch(), "imagine") + self.assertEqual(exp["simple_types/float"].fetch(), 3.14) + self.assertEqual( + exp["simple_types/datetime"].fetch(), + now.replace(microsecond=1000 * int(now.microsecond / 1000)), + ) + self.assertEqual(exp["simple_types/list"].fetch(), str(list(range(10)))) + + def test_assign_false(self): + # https://github.com/neptune-ai/neptune-client/issues/555 + for exp in self.get_experiments(): + with self.subTest(msg=f"For type {exp.container_type}"): + exp["params"] = {"predictor.cheat": False} + + self.assertFalse(exp["params/predictor.cheat"].fetch()) + + def test_assign_copy(self): + now = datetime.now() + for exp in self.get_experiments(): + with self.subTest(msg=f"For type {exp.container_type}"): + test_values = [ + ("num", 42), + ("str", "Bat'leth"), + ("float", 63.2), + ("bool", True), + ( + "datetime", + now.replace(microsecond=1000 * int(now.microsecond / 1000)), + ), + ] + for attr_name, attr_value in test_values: + exp[f"some/path/{attr_name}"] = attr_value + exp[f"copied/{attr_name}"] = exp[f"some/path/{attr_name}"] + + exp.wait() + + for attr_name, attr_value in test_values: + self.assertEqual(attr_value, exp[f"copied/{attr_name}"].fetch()) + + def test_assign_copy_to_existing(self): + for exp in self.get_experiments(): + with self.subTest(msg=f"For type {exp.container_type}"): + exp["some/path/num"] = 42 + exp["copied/path"] = 54 + exp["copied/path"] = exp["some/path/num"] + exp.wait() + self.assertEqual(42, exp["copied/path"].fetch()) + + def test_assign_copy_from_nonexistent(self): + for exp in self.get_experiments(): + with self.subTest(msg=f"For type {exp.container_type}"): + with self.assertRaises(Exception) as e: + exp["copied/path"] = exp["some/path/num"] + self.assertEqual("NoneType doesn't support copying", str(e.exception)) + + def test_access_blocked_after_stop(self): + for exp in self.get_experiments(): + with self.subTest(msg=f"For type {exp.container_type}"): + exp["attr1"] = 1 + + exp.stop() + + if isinstance(exp, Run): + expected_exception = InactiveRunException + elif isinstance(exp, Project): + expected_exception = InactiveProjectException + elif isinstance(exp, Model): + expected_exception = InactiveModelException + elif isinstance(exp, ModelVersion): + expected_exception = InactiveModelVersionException + else: + raise ValueError(f"Not supported exp type: {type(exp)}") + + with self.assertRaises(expected_exception): + exp["attr1"].fetch() + with self.assertRaises(expected_exception): + exp["attr2"] = 2 + with self.assertRaises(expected_exception): + exp["series"].log(1) + + def test_protected_paths(self): + model = init_model(key="MOD", mode="debug") + model_version = init_model_version(model=model["sys/id"].fetch(), mode="debug") + with self.assertRaises(NeptuneProtectedPathException): + model_version["sys/stage"] = "production" + + model_version["tmp/placeholder"] = "production" + with self.assertRaises(NeptuneProtectedPathException): + model_version["sys/stage"] = model_version["tmp/placeholder"] + + with self.assertRaises(NeptuneProtectedPathException): + del model_version["sys/stage"] + + with self.assertRaises(NeptuneProtectedPathException): + model_version["sys"].pop("stage") + + with self.assertRaises(NeptuneProtectedPathException): + del model_version["sys"] diff --git a/tests/neptune/new/test_handler.py b/tests/neptune/new/test_handler.py index 8cd6c0fc9..e6ebe83b6 100644 --- a/tests/neptune/new/test_handler.py +++ b/tests/neptune/new/test_handler.py @@ -47,7 +47,7 @@ from neptune.new.types.series.float_series import FloatSeries as FloatSeriesVal from neptune.new.types.series.string_series import StringSeries as StringSeriesVal from neptune.new.types.sets.string_set import StringSet as StringSetVal -from tests.neptune.new.helpers import create_file +from tests.neptune.new.utils.file_helpers import create_file class TestHandler(unittest.TestCase): @@ -373,6 +373,7 @@ def test_lookup(self): self.assertEqual(ns["some/value"].fetch(), 3) def test_attribute_error(self): + # pylint: disable=no-member exp = init(mode="debug", flush_period=0.5) with self.assertRaises(AttributeError): exp["var"].something() diff --git a/tests/neptune/new/test_run.py b/tests/neptune/new/test_run.py deleted file mode 100644 index b587dfdff..000000000 --- a/tests/neptune/new/test_run.py +++ /dev/null @@ -1,169 +0,0 @@ -# -# Copyright (c) 2020, Neptune Labs Sp. z o.o. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import unittest -from datetime import datetime - -from neptune.new import ANONYMOUS, init -from neptune.new.envs import API_TOKEN_ENV_NAME, PROJECT_ENV_NAME -from neptune.new.exceptions import MetadataInconsistency, InactiveRunException -from neptune.new.types.atoms.float import Float -from neptune.new.types.atoms.string import String -from neptune.new.types.series import FloatSeries, StringSeries - - -class TestRun(unittest.TestCase): - @classmethod - def setUpClass(cls) -> None: - os.environ[PROJECT_ENV_NAME] = "organization/project" - os.environ[API_TOKEN_ENV_NAME] = ANONYMOUS - - def test_define(self): - exp = init(mode="debug", flush_period=0.5) - exp.define("some/path/value", Float(5), wait=True) - self.assertEqual(exp.get_structure()["some"]["path"]["value"].fetch(), 5) - - def test_define_string(self): - exp = init(mode="debug", flush_period=0.5) - exp.define("some/path/value", String("Some string"), wait=True) - self.assertEqual( - exp.get_structure()["some"]["path"]["value"].fetch(), "Some string" - ) - - def test_define_few_variables(self): - exp = init(mode="debug", flush_period=0.5) - exp.define("some/path/num", Float(3)) - exp.define("some/path/text", String("Some text"), wait=True) - self.assertEqual(exp.get_structure()["some"]["path"]["num"].fetch(), 3) - self.assertEqual( - exp.get_structure()["some"]["path"]["text"].fetch(), "Some text" - ) - - def test_define_conflict(self): - exp = init(mode="debug", flush_period=0.5) - exp.define("some/path/value", Float(5)) - with self.assertRaises(MetadataInconsistency): - exp.define("some/path/value", Float(1)) - - def test_pop(self): - exp = init(mode="debug", flush_period=0.5) - exp.define("some/path/num", Float(3)) - exp.define("some/path/text", String("Some text")) - exp.pop("some/path/text") - self.assertTrue("num" in exp.get_structure()["some"]["path"]) - self.assertTrue("text" not in exp.get_structure()["some"]["path"]) - - def test_pop_namespace(self): - exp = init(mode="debug", flush_period=0.5) - exp.define("some/path/subpath/num", Float(3)) - exp.define("some/path/text", String("Some text")) - exp.define("some/otherpath", Float(4)) - exp.pop("some/path") - self.assertTrue("path" not in exp.get_structure()["some"]) - - def test_run_as_handler(self): - exp = init(mode="debug", flush_period=0.5) - exp.define("some/path/num", Float(3)) - exp.define("some/path/text", String("Some text")) - handler = exp["some/path"] - exp.wait() - self.assertEqual(handler["num"].fetch(), 3) - self.assertEqual(handler["text"].fetch(), "Some text") - - def test_assign_dict(self): - exp = init(mode="debug", flush_period=0.5) - now = datetime.now() - exp.assign( - { - "x": 5, - "metadata": {"name": "Trol", "age": 376}, - "toys": StringSeries(["cudgel", "hat"]), - "nested": {"nested": {"deep_secret": FloatSeries([13, 15])}}, - "simple_types": { - "int": 42, - "str": "imagine", - "float": 3.14, - "datetime": now, - "list": list(range(10)), - }, - } - ) - self.assertEqual(exp["x"].fetch(), 5) - self.assertEqual(exp["metadata/name"].fetch(), "Trol") - self.assertEqual(exp["metadata/age"].fetch(), 376) - self.assertEqual(exp["toys"].fetch_last(), "hat") - self.assertEqual(exp["nested/nested/deep_secret"].fetch_last(), 15) - self.assertEqual(exp["simple_types/int"].fetch(), 42) - self.assertEqual(exp["simple_types/str"].fetch(), "imagine") - self.assertEqual(exp["simple_types/float"].fetch(), 3.14) - self.assertEqual( - exp["simple_types/datetime"].fetch(), - now.replace(microsecond=1000 * int(now.microsecond / 1000)), - ) - self.assertEqual(exp["simple_types/list"].fetch(), str(list(range(10)))) - - def test_assign_false(self): - # https://github.com/neptune-ai/neptune-client/issues/555 - exp = init(mode="debug") - exp["params"] = {"predictor.cheat": False} - - self.assertFalse(exp["params/predictor.cheat"].fetch()) - - def test_assign_copy(self): - exp = init(mode="debug") - now = datetime.now() - test_values = [ - ("num", 42), - ("str", "Bat'leth"), - ("float", 63.2), - ("bool", True), - ("datetime", now.replace(microsecond=1000 * int(now.microsecond / 1000))), - ] - for attr_name, attr_value in test_values: - exp[f"some/path/{attr_name}"] = attr_value - exp[f"copied/{attr_name}"] = exp[f"some/path/{attr_name}"] - - exp.wait() - - for attr_name, attr_value in test_values: - self.assertEqual(attr_value, exp[f"copied/{attr_name}"].fetch()) - - def test_assign_copy_to_existing(self): - exp = init(mode="debug") - exp["some/path/num"] = 42 - exp["copied/path"] = 54 - exp["copied/path"] = exp["some/path/num"] - exp.wait() - self.assertEqual(42, exp["copied/path"].fetch()) - - def test_assign_copy_from_nonexistent(self): - exp = init(mode="debug") - with self.assertRaises(Exception) as e: - exp["copied/path"] = exp["some/path/num"] - self.assertEqual("NoneType doesn't support copying", str(e.exception)) - - def test_access_blocked_after_stop(self): - exp = init(mode="debug") - exp["attr1"] = 1 - - exp.stop() - - with self.assertRaises(InactiveRunException): - exp["attr1"].fetch() - with self.assertRaises(InactiveRunException): - exp["attr2"] = 2 - with self.assertRaises(InactiveRunException): - exp["series"].log(1) diff --git a/tests/neptune/new/test_sync.py b/tests/neptune/new/test_sync.py deleted file mode 100644 index 168047d42..000000000 --- a/tests/neptune/new/test_sync.py +++ /dev/null @@ -1,387 +0,0 @@ -# -# Copyright (c) 2020, Neptune Labs Sp. z o.o. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# pylint: disable=redefined-outer-name - -import os -import random -import string -import threading -import uuid -from random import randint - -import pytest - -import neptune.new.sync -from neptune.new.constants import OFFLINE_DIRECTORY -from neptune.new.exceptions import ProjectNotFound -from neptune.new.internal.backends.api_model import Project -from neptune.new.internal.container_type import ContainerType -from neptune.new.internal.containers.disk_queue import DiskQueue -from neptune.new.internal.operation import Operation -from neptune.new.internal.utils.sync_offset_file import SyncOffsetFile -from neptune.new.sync import ( - ApiRun, - get_project, - get_qualified_name, - sync_all_runs, - sync_selected_runs, - synchronization_status, -) - - -def a_run(): - return ApiRun( - str(uuid.uuid4()), "RUN-{}".format(randint(42, 12342)), "org", "proj", False - ) - - -def a_project(): - return ApiRun( - str(uuid.uuid4()), - "".join((random.choice(string.ascii_letters).upper() for _ in range(3))), - "org", - "proj", - False, - ) - - -def generate_get_run_impl(registered_experiments): - def get_run_impl(run_id): - """This function will return run as well as projects. Will be cleaned in ModelRegistry""" - for exp in registered_experiments: - if run_id in (str(exp.id), get_qualified_name(exp)): - return exp - - return get_run_impl - - -def prepare_projects(path): - unsync_project = a_project() - sync_project = a_project() - registered_projects = (unsync_project, sync_project) - - execution_id = "exec-0" - - for project in registered_projects: - project_path = path / "async" / str(project.id) / execution_id - project_path.mkdir(parents=True) - queue = DiskQueue( - project_path, - lambda x: x, - lambda x: x, - threading.RLock(), - ContainerType.PROJECT, - ) - queue.put("op-proj-0") - queue.put("op-proj-1") - - SyncOffsetFile( - path / "async" / str(unsync_project.id) / execution_id / "last_ack_version" - ).write(1) - SyncOffsetFile( - path / "async" / str(unsync_project.id) / execution_id / "last_put_version" - ).write(2) - - SyncOffsetFile( - path / "async" / str(sync_project.id) / execution_id / "last_ack_version" - ).write(2) - SyncOffsetFile( - path / "async" / str(sync_project.id) / execution_id / "last_put_version" - ).write(2) - - return unsync_project, sync_project, generate_get_run_impl(registered_projects) - - -def prepare_runs(path): - unsync_exp = a_run() - sync_exp = a_run() - registered_runs = (unsync_exp, sync_exp) - - execution_id = "exec-0" - - for exp in registered_runs: - exp_path = path / "async" / str(exp.id) / execution_id - exp_path.mkdir(parents=True) - queue = DiskQueue( - exp_path, lambda x: x, lambda x: x, threading.RLock(), ContainerType.RUN - ) - queue.put("op-0") - queue.put("op-1") - - SyncOffsetFile( - path / "async" / str(unsync_exp.id) / execution_id / "last_ack_version" - ).write(1) - SyncOffsetFile( - path / "async" / str(unsync_exp.id) / execution_id / "last_put_version" - ).write(2) - - SyncOffsetFile( - path / "async" / str(sync_exp.id) / execution_id / "last_ack_version" - ).write(2) - SyncOffsetFile( - path / "async" / str(sync_exp.id) / execution_id / "last_put_version" - ).write(2) - - return unsync_exp, sync_exp, generate_get_run_impl(registered_runs) - - -def prepare_offline_run(path): - offline_exp_uuid = str(uuid.uuid4()) - offline_exp_path = path / OFFLINE_DIRECTORY / offline_exp_uuid - offline_exp_path.mkdir(parents=True) - - queue = DiskQueue( - offline_exp_path, lambda x: x, lambda x: x, threading.RLock(), ContainerType.RUN - ) - queue.put("op-0") - queue.put("op-1") - SyncOffsetFile( - path / OFFLINE_DIRECTORY / offline_exp_uuid / "last_put_version" - ).write(2) - - return offline_exp_uuid - - -def test_list_projects(tmp_path, mocker, capsys): - """TODO: we're mentioning projects as runs, will be improved with ModelRegistry""" - # given - unsync_proj, sync_proj, get_exp_impl = prepare_projects(tmp_path) - offline_exp_uuid = prepare_offline_run(tmp_path) - - # and - mocker.patch.object(neptune.new.sync, "get_run", get_exp_impl) - mocker.patch.object(Operation, "from_dict") - - # when - synchronization_status(tmp_path) - - # then - captured = capsys.readouterr() - assert captured.err == "" - assert ( - "Synchronized runs:\n- {}".format(get_qualified_name(sync_proj)) in captured.out - ) - assert ( - "Unsynchronized runs:\n- {}".format(get_qualified_name(unsync_proj)) - in captured.out - ) - assert ( - "Unsynchronized offline runs:\n- offline/{}".format(offline_exp_uuid) - in captured.out - ) - - -def test_list_runs(tmp_path, mocker, capsys): - # given - unsync_exp, sync_exp, get_run_impl = prepare_runs(tmp_path) - offline_exp_uuid = prepare_offline_run(tmp_path) - - # and - mocker.patch.object(neptune.new.sync, "get_run", get_run_impl) - mocker.patch.object(Operation, "from_dict") - - # when - synchronization_status(tmp_path) - - # then - captured = capsys.readouterr() - assert captured.err == "" - assert ( - "Synchronized runs:\n- {}".format(get_qualified_name(sync_exp)) in captured.out - ) - assert ( - "Unsynchronized runs:\n- {}".format(get_qualified_name(unsync_exp)) - in captured.out - ) - assert ( - "Unsynchronized offline runs:\n- offline/{}".format(offline_exp_uuid) - in captured.out - ) - - -def test_list_runs_when_no_run(tmp_path, capsys): - (tmp_path / "async").mkdir() - # when - with pytest.raises(SystemExit): - synchronization_status(tmp_path) - - # then - captured = capsys.readouterr() - assert captured.err == "" - assert "There are no Neptune runs" in captured.out - - -def test_sync_all_runs(tmp_path, mocker, capsys): - # given - unsync_proj, sync_proj, _ = prepare_projects(tmp_path) - unsync_exp, sync_exp, _ = prepare_runs(tmp_path) - get_run_impl = generate_get_run_impl((unsync_proj, sync_proj, unsync_exp, sync_exp)) - offline_exp_uuid = prepare_offline_run(tmp_path) - registered_offline_run = a_run() - - # and - mocker.patch.object(neptune.new.sync, "get_run", get_run_impl) - mocker.patch.object(neptune.new.sync, "backend") - mocker.patch.object(neptune.new.sync.backend, "execute_operations") - mocker.patch.object( - neptune.new.sync.backend, - "get_project", - lambda _: Project(str(uuid.uuid4()), "project", "workspace"), - ) - mocker.patch.object( - neptune.new.sync, - "register_offline_run", - lambda project, container_type: (registered_offline_run, True), - ) - mocker.patch.object(Operation, "from_dict", lambda x: x) - neptune.new.sync.backend.execute_operations.return_value = (1, []) - - # when - sync_all_runs(tmp_path, "foo") - - # then - captured = capsys.readouterr() - assert captured.err == "" - assert ( - "Offline run {} registered as {}".format( - offline_exp_uuid, get_qualified_name(registered_offline_run) - ) - ) in captured.out - assert "Synchronising {}".format(get_qualified_name(unsync_exp)) in captured.out - assert "Synchronising {}".format(get_qualified_name(unsync_proj)) in captured.out - assert ( - "Synchronization of run {} completed.".format(get_qualified_name(unsync_exp)) - in captured.out - ) - assert ( - "Synchronization of project {} completed.".format( - get_qualified_name(unsync_proj) - ) - in captured.out - ) - assert "Synchronising {}".format(get_qualified_name(sync_exp)) not in captured.out - assert "Synchronising {}".format(get_qualified_name(sync_proj)) not in captured.out - - # and - # pylint: disable=no-member - neptune.new.sync.backend.execute_operations.has_calls( - [ - mocker.call(unsync_exp.id, ContainerType.RUN, ["op-1"]), - mocker.call(registered_offline_run.id, ContainerType.RUN, ["op-1"]), - mocker.call(unsync_proj.id, ContainerType.PROJECT, ["op-proj-1"]), - ], - any_order=True, - ) - - -def test_sync_selected_runs(tmp_path, mocker, capsys): - # given - unsync_exp, sync_exp, get_run_impl = prepare_runs(tmp_path) - offline_exp_uuid = prepare_offline_run(tmp_path) - registered_offline_exp = a_run() - - def get_run_impl_(run_id: str): - if run_id in ( - str(registered_offline_exp.id), - get_qualified_name(registered_offline_exp), - ): - return registered_offline_exp - else: - return get_run_impl(run_id) - - # and - mocker.patch.object(neptune.new.sync, "get_run", get_run_impl_) - mocker.patch.object(neptune.new.sync, "backend") - mocker.patch.object(neptune.new.sync.backend, "execute_operations") - mocker.patch.object( - neptune.new.sync.backend, - "get_project", - lambda _: Project(str(uuid.uuid4()), "project", "workspace"), - ) - mocker.patch.object( - neptune.new.sync, - "register_offline_run", - lambda project, container_type: (registered_offline_exp, True), - ) - mocker.patch.object(Operation, "from_dict", lambda x: x) - neptune.new.sync.backend.execute_operations.return_value = (2, []) - - # when - sync_selected_runs( - tmp_path, - "some-name", - [get_qualified_name(sync_exp), "offline/" + offline_exp_uuid], - ) - - # then - captured = capsys.readouterr() - assert captured.err == "" - assert "Synchronising {}".format(get_qualified_name(sync_exp)) in captured.out - assert ( - "Synchronization of run {} completed.".format(get_qualified_name(sync_exp)) - in captured.out - ) - assert ( - "Synchronising {}".format(get_qualified_name(registered_offline_exp)) - in captured.out - ) - assert ( - "Synchronization of run {} completed.".format( - get_qualified_name(registered_offline_exp) - ) - in captured.out - ) - assert "Synchronising {}".format(get_qualified_name(unsync_exp)) not in captured.out - - # and - # pylint: disable=no-member - neptune.new.sync.backend.execute_operations.assert_called_with( - registered_offline_exp.id, ContainerType.RUN, operations=["op-0", "op-1"] - ) - - -def test_get_project_no_name_set(mocker): - # given - mocker.patch.object(os, "getenv") - os.getenv.return_value = None - - # expect - assert get_project(None) is None - - -def test_get_project_project_not_found(mocker): - # given - mocker.patch.object(neptune.new.sync, "backend") - mocker.patch.object(neptune.new.sync.backend, "get_project") - neptune.new.sync.backend.get_project.side_effect = ProjectNotFound("foo") - - # expect - assert get_project("foo") is None - - -def test_sync_non_existent_run(tmp_path, mocker, capsys): - # given - mocker.patch.object(neptune.new.sync, "get_project") - mocker.patch.object(neptune.new.sync, "get_run") - neptune.new.sync.get_run.return_value = a_run() - - # when - sync_selected_runs(tmp_path, "foo", ["bar"]) - - # then - captured = capsys.readouterr() - assert "Warning: Run 'bar' does not exist in location" in captured.err diff --git a/tests/neptune/new/utils/__init__.py b/tests/neptune/new/utils/__init__.py new file mode 100644 index 000000000..d71b3273e --- /dev/null +++ b/tests/neptune/new/utils/__init__.py @@ -0,0 +1,15 @@ +# +# Copyright (c) 2021, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/neptune/new/utils/api_experiments_factory.py b/tests/neptune/new/utils/api_experiments_factory.py new file mode 100644 index 000000000..533b54434 --- /dev/null +++ b/tests/neptune/new/utils/api_experiments_factory.py @@ -0,0 +1,88 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +__all__ = [ + "api_metadata_container", + "api_run", + "api_model", + "api_project", + "api_model_version", +] + +import random +import string +import uuid +from random import randint + +from neptune.new.internal.backends.api_model import ApiExperiment +from neptune.new.internal.container_type import ContainerType +from neptune.new.internal.id_formats import SysId, UniqueId + + +def api_metadata_container(container_type: ContainerType) -> ApiExperiment: + if container_type == ContainerType.PROJECT: + return api_project() + elif container_type == ContainerType.RUN: + return api_run() + elif container_type == ContainerType.MODEL: + return api_model() + elif container_type == ContainerType.MODEL_VERSION: + return api_model_version() + else: + raise ValueError(f"Unknown container_type: {container_type.value}") + + +def api_project() -> ApiExperiment: + return _api_metadata_container( + sys_id=_random_key(), container_type=ContainerType.PROJECT + ) + + +def api_run() -> ApiExperiment: + return _api_metadata_container( + sys_id=f"{_random_key()}-{randint(42, 12342)}", container_type=ContainerType.RUN + ) + + +def api_model() -> ApiExperiment: + return _api_metadata_container( + sys_id=f"{_random_key()}-{_random_key()}", container_type=ContainerType.MODEL + ) + + +def api_model_version() -> ApiExperiment: + return _api_metadata_container( + sys_id=f"{_random_key()}-{_random_key()}-{randint(42, 12342)}", + container_type=ContainerType.MODEL_VERSION, + ) + + +def _api_metadata_container( + sys_id: str, container_type: ContainerType +) -> ApiExperiment: + return ApiExperiment( + id=UniqueId(str(uuid.uuid4())), + type=container_type, + sys_id=SysId(sys_id), + workspace="workspace", + project_name="sandbox", + trashed=False, + ) + + +def _random_key(key_length: int = 3) -> str: + return "".join( + random.choice(string.ascii_letters).upper() for _ in range(key_length) + ) diff --git a/tests/neptune/new/helpers.py b/tests/neptune/new/utils/file_helpers.py similarity index 100% rename from tests/neptune/new/helpers.py rename to tests/neptune/new/utils/file_helpers.py diff --git a/tests/neptune/test_imports.py b/tests/neptune/test_imports.py index 90f1d97e8..ceac3cb0c 100644 --- a/tests/neptune/test_imports.py +++ b/tests/neptune/test_imports.py @@ -300,8 +300,6 @@ from neptune.new.handler import FileVal from neptune.new.handler import FloatSeries from neptune.new.handler import Handler -from neptune.new.handler import Iterable -from neptune.new.handler import List from neptune.new.handler import NeptuneException from neptune.new.handler import StringSeries from neptune.new.handler import StringSet @@ -311,15 +309,10 @@ from neptune.new.integrations.python_logger import RunState from neptune.new.logging.logger import Logger from neptune.new.logging.logger import Run -from neptune.new.project import Iterable from neptune.new.project import Project -from neptune.new.project import RunsTable from neptune.new.run import Attribute -from neptune.new.run import AttributeType -from neptune.new.run import BackgroundJob from neptune.new.run import Boolean from neptune.new.run import Datetime -from neptune.new.run import DeleteAttribute from neptune.new.run import Float from neptune.new.run import Handler from neptune.new.run import InactiveRunException @@ -328,32 +321,22 @@ from neptune.new.run import Namespace from neptune.new.run import NamespaceAttr from neptune.new.run import NamespaceBuilder -from neptune.new.run import NeptuneBackend from neptune.new.run import NeptunePossibleLegacyUsageException -from neptune.new.run import OperationProcessor from neptune.new.run import Run from neptune.new.run import RunState -from neptune.new.run import RunStructure from neptune.new.run import String from neptune.new.run import Value -from neptune.new.run import ValueToAttributeVisitor from neptune.new.runs_table import AttributeType from neptune.new.runs_table import AttributeWithProperties -from neptune.new.runs_table import Dict from neptune.new.runs_table import LeaderboardEntry from neptune.new.runs_table import LeaderboardHandler -from neptune.new.runs_table import List from neptune.new.runs_table import MetadataInconsistency from neptune.new.runs_table import RunsTable from neptune.new.runs_table import RunsTableEntry -from neptune.new.runs_table import datetime -from neptune.new.sync import ApiRun +from neptune.new.sync import ApiExperiment from neptune.new.sync import CannotSynchronizeOfflineRunsWithoutProject -from neptune.new.sync import Credentials from neptune.new.sync import DiskQueue from neptune.new.sync import HostedNeptuneBackend -from neptune.new.sync import Iterable -from neptune.new.sync import List from neptune.new.sync import NeptuneBackend from neptune.new.sync import NeptuneConnectionLostException from neptune.new.sync import NeptuneException @@ -362,8 +345,6 @@ from neptune.new.sync import Project from neptune.new.sync import ProjectNotFound from neptune.new.sync import RunNotFound -from neptune.new.sync import Sequence -from neptune.new.sync import Tuple from neptune.new.types.atoms.artifact import Artifact from neptune.new.types.atoms.artifact import Atom from neptune.new.types.atoms.artifact import FileHasher @@ -462,7 +443,6 @@ from neptune.projects import Experiment from neptune.projects import NeptuneNoExperimentContextException from neptune.projects import Project -from neptune.sessions import HostedNeptuneBackendApiClient from neptune.sessions import OrderedDict from neptune.sessions import Project from neptune.sessions import Session