From 1ca2ac405ac1953b213545500066e2417445ea2f Mon Sep 17 00:00:00 2001 From: Simon Edwardsson Date: Mon, 6 Jul 2020 16:32:10 +0100 Subject: [PATCH] fixes for importing into workflows --- darwin/dataset/remote_dataset.py | 2 +- darwin/importer/formats/csvtags.py | 2 ++ darwin/importer/importer.py | 7 +++++-- docs/torch.md | 2 +- setup.py | 2 +- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index 99ceac7e9..4a0674e0c 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -264,7 +264,7 @@ def fetch_remote_files(self): base_url = f"/datasets/{self.dataset_id}/items" if not self.client.feature_enabled("WORKFLOW", self.team): base_url = f"/datasets/{self.dataset_id}/dataset_images" - cursor = "" + cursor = "?page[size]=500" while True: response = self.client.get(f"{base_url}{cursor}", team=self.team) yield from response["items"] diff --git a/darwin/importer/formats/csvtags.py b/darwin/importer/formats/csvtags.py index a6dfec62a..4ac294ccf 100644 --- a/darwin/importer/formats/csvtags.py +++ b/darwin/importer/formats/csvtags.py @@ -14,6 +14,8 @@ def parse_file(path: Path) -> Optional[List[dt.AnnotationFile]]: reader = csv.reader(f) for row in reader: filename, *tags = map(lambda s: s.strip(), row) + if filename == "": + continue annotations = [dt.make_tag(tag) for tag in tags if len(tag) > 0] annotation_classes = set([annotation.annotation_class for annotation in annotations]) files.append(dt.AnnotationFile(path, filename, annotation_classes, annotations)) diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py index 1a9de36db..0e31ebd8b 100644 --- a/darwin/importer/importer.py +++ b/darwin/importer/importer.py @@ -1,5 +1,6 @@ from pathlib import Path from typing import Callable, List, Union +from tqdm import tqdm import darwin.datatypes as dt from darwin.utils import secure_continue_request @@ -93,7 +94,7 @@ def import_annotations( parsed_files = importer(local_path) if type(parsed_files) is not list: parsed_files = [parsed_files] - for parsed_file in parsed_files: + for parsed_file in tqdm(parsed_files): image_id = remote_files[parsed_file.filename] _import_annotations(dataset.client, image_id, remote_classes, parsed_file.annotations, dataset) @@ -108,6 +109,8 @@ def _import_annotations(client: "Client", id: int, remote_classes, annotations, ) if client.feature_enabled("WORKFLOW", dataset.team): - client.post(f"/items/{id}/import", payload={"annotations": serialized_annotations}) + res = client.post(f"/dataset_items/{id}/import", payload={"annotations": serialized_annotations}) + if res["status_code"] != 200: + print(f"warning, failed to upload annotation to {id}") else: client.post(f"/dataset_images/{id}/import", payload={"annotations": serialized_annotations}) diff --git a/docs/torch.md b/docs/torch.md index d87bde045..f4b973ac0 100644 --- a/docs/torch.md +++ b/docs/torch.md @@ -158,7 +158,7 @@ for epoch in range(10): ``` -## Darwin છ Detectron2 +## Darwin ✕ Detectron2 This tutorial shows how to train [Detectron2](https://github.com/facebookresearch/detectron2) models in your Darwin datasets. If you do not have Detectron2 installed yet, please follow these [installation instructions](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md). diff --git a/setup.py b/setup.py index 284cad84d..00d97636a 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="darwin-py", - version="0.5.5", + version="0.5.6", author="V7", author_email="info@v7labs.com", description="Library and command line interface for darwin.v7labs.com",