diff --git a/.github/hub/requirements.txt b/.github/hub/requirements.txt deleted file mode 100644 index 0dd1e1e4..00000000 --- a/.github/hub/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -GitPython==3.1.30 -python-dotenv==0.19.2 -requests==2.25.1 -tqdm==4.62.3 diff --git a/.github/hub/update_hub_repositories.py b/.github/hub/update_hub_repositories.py deleted file mode 100644 index d6bfb105..00000000 --- a/.github/hub/update_hub_repositories.py +++ /dev/null @@ -1,324 +0,0 @@ -# adapted from https://github.com/huggingface/datasets/blob/master/.github/hub/update_hub_repositories.py - -import base64 -import distutils.dir_util -import logging -import os -import re -import sys -from itertools import islice -from pathlib import Path -from typing import Dict, Optional, Set, Tuple - -import requests -from dotenv import load_dotenv -from git import Repo -from tqdm.contrib.concurrent import thread_map - -load_dotenv() -logger = logging.getLogger(__name__) -ROOT = Path() - -# General environment variables accepted values for booleans -ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"} -ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"}) - -if os.environ.get("HF_USE_PROD", "AUTO") in ENV_VARS_TRUE_VALUES: - HUB_ENDPOINT = "https://huggingface.co" -else: - HUB_ENDPOINT = "https://moon-staging.huggingface.co" - -HUB_CANONICAL_WHOAMI = HUB_ENDPOINT + "/api/whoami-v2" -HUB_CANONICAL_CREATE_URL = HUB_ENDPOINT + "/api/repos/create" -HUB_CANONICAL_INFO_URL = HUB_ENDPOINT + "/api/datasets/{organization}/{dataset_name}" -HUB_CANONICAL_DATASET_GIT_URL = ( - HUB_ENDPOINT.replace("https://", "https://user:{token}@") - + "/datasets/{organization}/{dataset_name}.git" -) -HUB_API_GH_TO_HF = HUB_ENDPOINT + "/api/gh-to-hf/{github_username}" -DATASETS_LIB_CATALOG_DIR_NAME = "dataset_builders" -DATASETS_LIB_COMMIT_URL = "https://github.com/huggingface/datasets/{organization}/commit/{hexsha}" -CANONICAL_DATASET_REPO_MAIN_BRANCH = "main" -HUB_DIR_NAME = "hub" - - -def hf_retrieve_author(author_name, author_email) -> Tuple[str, str]: - # Some HF members have enabled email address privacy on GitHub - # This is here just to be able to link the commits to their HF accounts - if author_email.endswith("@users.noreply.github.com"): - try: - github_username = author_email[: -len("@users.noreply.github.com")].split("+", 1)[-1] - response = requests.get(HUB_API_GH_TO_HF.format(github_username=github_username)) - author_email = response.json()["user"] + "@users.noreply.huggingface.co" - except Exception: - pass - return author_name, author_email - - -class UnauthorizedError(ConnectionError): - pass - - -class UpdateFailed(RuntimeError): - pass - - -def src_canonical_dataset_path(datasets_lib_path: Path, dataset_name: str) -> Path: - return datasets_lib_path / DATASETS_LIB_CATALOG_DIR_NAME / dataset_name - - -def canonical_dataset_path(dataset_name: str) -> Path: - return ROOT / HUB_DIR_NAME / dataset_name - - -def canonical_dataset_git_url(dataset_name: str, token: str, organization: str) -> str: - return HUB_CANONICAL_DATASET_GIT_URL.format( - dataset_name=dataset_name, token=token, organization=organization - ) - - -def canonical_dataset_info_url(dataset_name: str, organization: str) -> str: - return HUB_CANONICAL_INFO_URL.format(dataset_name=dataset_name, organization=organization) - - -def create_remote_repo(dataset_name: str, token: str, organization: str): - response = requests.post( - HUB_CANONICAL_CREATE_URL, - headers={"authorization": f"Bearer {token}"}, - json={ - "name": dataset_name, - "organization": organization, - # "canonical": True, - "type": "dataset", - }, - ) - response.raise_for_status() - - -def whoami(token: str) -> str: - response = requests.get(HUB_CANONICAL_WHOAMI, headers={"authorization": f"Bearer {token}"}) - response.raise_for_status() - user_info = response.json() - return user_info - - -def check_authorizations(user_info: dict, organization: str): - if organization not in [org["name"] for org in user_info["orgs"] if org["type"] == "org"]: - raise UnauthorizedError( - f"User {user_info['name']} is not part of the 'trusted-committers' org: " - "it can't push to canonical repositories" - ) - - -def apply_hacks_for_moon_landing(dataset_repo_path: Path): - if (dataset_repo_path / "README.md").is_file(): - with (dataset_repo_path / "README.md").open() as f: - readme_content = f.read() - if readme_content.count("---\n") > 1: - _, tags, content = readme_content.split("---\n", 2) - tags = tags.replace("\nlicense:", "\nlicenses:").replace(".", "-").replace("$", "%") - with (dataset_repo_path / "README.md").open("w") as f: - f.write("---\n".join(["", tags, content])) - - -class update_main: - def __init__( - self, - organization: str, - datasets_lib_path: str, - commit_args: Tuple[str], - token: str, - deleted_files: Dict[str, Set[str]], - tag_name: Optional[str] = None, - ) -> None: - self.organization = organization - self.datasets_lib_path = datasets_lib_path - self.commit_args = commit_args - self.token = token - self.deleted_files = ( - deleted_files # dict dataset_name -> set of relative paths of the deleted files - ) - self.tag_name = tag_name - - def __call__(self, dataset_name: str) -> bool: - try: - create_remote_repo(dataset_name, self.token, self.organization) - except requests.exceptions.HTTPError as e: - if "409 Client Error: Conflict for url:" not in repr( - e - ): # don't log if repo already exists - logger.warning(f"[{dataset_name}] " + repr(e)) - if not canonical_dataset_path(dataset_name).is_dir(): - repo = Repo.clone_from( - canonical_dataset_git_url(dataset_name, self.token, self.organization), - to_path=canonical_dataset_path(dataset_name), - ) - else: - repo = Repo(canonical_dataset_path(dataset_name)) - - logs = [] - logs.append(repo.git.reset("--hard")) - logs.append(repo.git.clean("-f", "-d")) - logs.append(repo.git.checkout(CANONICAL_DATASET_REPO_MAIN_BRANCH)) - logs.append(repo.remote().pull()) - # Copy the changes and commit - distutils.dir_util.copy_tree( - str(src_canonical_dataset_path(datasets_lib_path, dataset_name)), - str(canonical_dataset_path(dataset_name)), - ) - for filepath_to_delete in self.deleted_files.get(dataset_name, []): - try: - (canonical_dataset_path(dataset_name) / filepath_to_delete).unlink() - except Exception as e: - logger.warning( - f"[{dataset_name}] Couldn't delete file at {filepath_to_delete}: {repr(e)}" - ) - apply_hacks_for_moon_landing(canonical_dataset_path(dataset_name)) - logs.append(repo.git.add(".")) - if "Changes to be committed:" in repo.git.status(): - logs.append(repo.git.commit(*self.commit_args)) - try: - logs.append(repo.git.push()) - if self.tag_name: - # If the dataset repository hasn't been tagged for this release yet, - # it means that the new version of the datasets lib just got released. - # In this case we have to tag the new commit with this release name - logs.append( - repo.git.tag( - self.tag_name, f"-m Add tag from dataset_builders {self.tag_name}" - ) - ) - logs.append(repo.git.push("--tags")) - except Exception as e: - logs.append("push failed !") - logs.append(repr(e)) - if "Your branch is up to date with" not in repo.git.status(): - logs.append(repo.git.status()) - logs = "\n".join(str(log) for log in logs) - logger.warning(f"[{dataset_name}] Push failed") - logger.warning(f"[{dataset_name}] Git logs: \n{logs}") - return False - else: - return True - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) - token = os.environ["HF_TOKEN"] - organization = os.environ["ORGANIZATION"] - datasets_lib_path = Path(os.environ["DATASETS_LIB_PATH"]).expanduser().resolve() - - if Path(token).expanduser().is_file(): - with Path(token).expanduser().open() as f: - token = f.read().strip() - user_info = whoami(token) - check_authorizations(user_info, organization) - - datasets_lib_repo = Repo(datasets_lib_path) - current_commit, prev_commit = list(islice(datasets_lib_repo.iter_commits(), 2)) - author_name, author_email = current_commit.author.name, current_commit.author.email - author_name, author_email = hf_retrieve_author(author_name, author_email) - commit_args = (f"-m {current_commit.message}",) - commit_args += ( - f"-m Commit from {DATASETS_LIB_COMMIT_URL.format(organization=organization, hexsha=current_commit.hexsha)}", - ) - commit_args += (f"--author={author_name} <{author_email}>",) - - for _tag in datasets_lib_repo.tags: - # Add a new tag if this is a `datasets` release - if _tag.commit == current_commit and re.match(r"^v[0-9]+\.[0-9]+\.[0-9]+$", _tag.name): - new_tag = _tag - break - else: - new_tag = None - - changed_files_since_last_commit = [ - path - for diff in datasets_lib_repo.index.diff(prev_commit) - for path in [diff.a_path, diff.b_path] - if path.startswith(DATASETS_LIB_CATALOG_DIR_NAME) and path.count("/") >= 2 - ] - - changed_datasets_names_since_last_commit = { - path.split("/")[1] for path in changed_files_since_last_commit - } - # ignore json, csv etc. - changed_datasets_names_since_last_commit = { - dataset_name - for dataset_name in changed_datasets_names_since_last_commit - if ( - datasets_lib_path - / DATASETS_LIB_CATALOG_DIR_NAME - / dataset_name - / (dataset_name + ".py") - ).is_file() - } - - deleted_files = { - dataset_name: set() for dataset_name in changed_datasets_names_since_last_commit - } - for path in changed_files_since_last_commit: - _, dataset_name, rel_path = path.split("/", 2) - if ( - dataset_name in changed_datasets_names_since_last_commit - and not (datasets_lib_path / path).is_file() - ): - deleted_files[dataset_name].add(rel_path) - - dataset_names = sys.argv[1:] - if dataset_names: - if dataset_names[0] == "--all": - dataset_names = sorted( - d.name - for d in (datasets_lib_path / DATASETS_LIB_CATALOG_DIR_NAME).glob("*") - if d.is_dir() and (d / (d.name + ".py")).is_file() # ignore json, csv etc. - ) - if dataset_names[0] == "--auto": - if new_tag: - logger.info( - "All the datasets will be updated since --auto was used and " - f"this is a new release {new_tag.name} of the `datasets` library." - ) - dataset_names = sorted( - d.name for d in (ROOT / HUB_DIR_NAME).glob("*") if d.is_dir() - ) - dataset_names = sorted( - d.name - for d in (datasets_lib_path / DATASETS_LIB_CATALOG_DIR_NAME).glob("*") - if d.is_dir() and (d / (d.name + ".py")).is_file() # ignore json, csv etc. - ) - else: - logger.info( - "All the datasets that have been changed in the latest commit of `datasets` will be updated " - "since --auto was used." - ) - dataset_names = sorted(changed_datasets_names_since_last_commit) - if dataset_names: - logger.info( - f"Updating the '{CANONICAL_DATASET_REPO_MAIN_BRANCH}' branch of those datasets: {' '.join(dataset_names)}" - ) - successes = thread_map( - update_main( - organization=organization, - datasets_lib_path=datasets_lib_path, - commit_args=commit_args, - token=token, - deleted_files=deleted_files, - tag_name=new_tag.name if new_tag else None, - ), - dataset_names, - ) - datasets_with_errors = [ - dataset_name - for success, dataset_name in zip(successes, dataset_names) - if not success - ] - if datasets_with_errors: - raise UpdateFailed( - f"Those datasets couldn't be updated: {' '.join(datasets_with_errors)}\n" - "Please check the logs to see what went wrong.\n" - "Once you fixed the errors, you can re-run this script:\n\n" - f"\tpython update_main.py {' '.join(datasets_with_errors)}" - ) - else: - logger.info("No changes detected -- nothing to update !") diff --git a/.github/workflows/update-hub-repositories.yaml b/.github/workflows/update-hub-repositories.yaml deleted file mode 100644 index 837bcb40..00000000 --- a/.github/workflows/update-hub-repositories.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: Update Hub repositories - -on: workflow_dispatch - -jobs: - update-hub-repositories: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: "3.9" - - name: Set up default Git config - run: | - git config --global user.name system - git config --global user.email christophalt@posteo.de - - name: Install dependencies - working-directory: ./.github/hub - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Update Hub repositories - working-directory: ./.github/hub - run: | - export HF_TOKEN=${{ secrets.HUB_TRUSTED_COMMITER_TOKEN }} - export ORGANIZATION=pie - export DATASETS_LIB_PATH=$GITHUB_WORKSPACE - export HF_USE_PROD=1 - export GIT_LFS_SKIP_SMUDGE=1 - python update_hub_repositories.py --all diff --git a/dataset_builders/conll2002/conll2002.py b/dataset_builders/conll2002/conll2002.py deleted file mode 100644 index 74801001..00000000 --- a/dataset_builders/conll2002/conll2002.py +++ /dev/null @@ -1,57 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans - - -class Conll2002Config(datasets.BuilderConfig): - """BuilderConfig for CoNLL2002""" - - def __init__(self, **kwargs): - """BuilderConfig for CoNLL2002. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - - -@dataclass -class CoNLL2002Document(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class Conll2003(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = CoNLL2002Document - - BASE_DATASET_PATH = "conll2002" - - BUILDER_CONFIGS = [ - Conll2002Config( - name="es", version=datasets.Version("1.0.0"), description="CoNLL2002 Spanish dataset" - ), - Conll2002Config( - name="nl", version=datasets.Version("1.0.0"), description="CoNLL2002 Dutch dataset" - ), - ] - - def _generate_document_kwargs(self, dataset): - return {"int_to_str": dataset.features["ner_tags"].feature.int2str} - - def _generate_document(self, example, int_to_str): - doc_id = example["id"] - tokens = example["tokens"] - ner_tags = [int_to_str(tag) for tag in example["ner_tags"]] - - text, ner_spans = tokens_and_tags_to_text_and_labeled_spans(tokens=tokens, tags=ner_tags) - - document = CoNLL2002Document(text=text, id=doc_id) - - for span in sorted(ner_spans, key=lambda span: span.start): - document.entities.append(span) - - return document diff --git a/dataset_builders/conll2002/dummy/es/1.0.0/dummy_data.zip b/dataset_builders/conll2002/dummy/es/1.0.0/dummy_data.zip deleted file mode 100644 index 2c39a0e0..00000000 Binary files a/dataset_builders/conll2002/dummy/es/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/conll2002/dummy/nl/1.0.0/dummy_data.zip b/dataset_builders/conll2002/dummy/nl/1.0.0/dummy_data.zip deleted file mode 100644 index 95cd3d19..00000000 Binary files a/dataset_builders/conll2002/dummy/nl/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/conll2003/conll2003.py b/dataset_builders/conll2003/conll2003.py deleted file mode 100644 index 61b0f70a..00000000 --- a/dataset_builders/conll2003/conll2003.py +++ /dev/null @@ -1,54 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans - - -class CoNLL2003Config(datasets.BuilderConfig): - """BuilderConfig for CoNLL2003""" - - def __init__(self, **kwargs): - """BuilderConfig for CoNLL2003. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - - -@dataclass -class CoNLL2003Document(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class Conll2003(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = CoNLL2003Document - - BASE_DATASET_PATH = "conll2003" - - BUILDER_CONFIGS = [ - CoNLL2003Config( - name="conll2003", version=datasets.Version("1.0.0"), description="CoNLL2003 dataset" - ), - ] - - def _generate_document_kwargs(self, dataset): - return {"int_to_str": dataset.features["ner_tags"].feature.int2str} - - def _generate_document(self, example, int_to_str): - doc_id = example["id"] - tokens = example["tokens"] - ner_tags = [int_to_str(tag) for tag in example["ner_tags"]] - - text, ner_spans = tokens_and_tags_to_text_and_labeled_spans(tokens=tokens, tags=ner_tags) - - document = CoNLL2003Document(text=text, id=doc_id) - - for span in sorted(ner_spans, key=lambda span: span.start): - document.entities.append(span) - - return document diff --git a/dataset_builders/conll2003/dummy/conll2003/1.0.0/dummy_data.zip b/dataset_builders/conll2003/dummy/conll2003/1.0.0/dummy_data.zip deleted file mode 100644 index 70fbd32d..00000000 Binary files a/dataset_builders/conll2003/dummy/conll2003/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/conllpp/conllpp.py b/dataset_builders/conllpp/conllpp.py deleted file mode 100644 index 18896a2a..00000000 --- a/dataset_builders/conllpp/conllpp.py +++ /dev/null @@ -1,54 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans - - -class CoNLLppConfig(datasets.BuilderConfig): - """BuilderConfig for CoNLLpp""" - - def __init__(self, **kwargs): - """BuilderConfig for CoNLLpp. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - - -@dataclass -class CoNLLppDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class CoNLLpp(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = CoNLLppDocument - - BASE_DATASET_PATH = "conllpp" - - BUILDER_CONFIGS = [ - CoNLLppConfig( - name="conllpp", version=datasets.Version("1.0.0"), description="CoNLLpp dataset" - ), - ] - - def _generate_document_kwargs(self, dataset): - return {"int_to_str": dataset.features["ner_tags"].feature.int2str} - - def _generate_document(self, example, int_to_str): - doc_id = example["id"] - tokens = example["tokens"] - ner_tags = [int_to_str(tag) for tag in example["ner_tags"]] - - text, ner_spans = tokens_and_tags_to_text_and_labeled_spans(tokens=tokens, tags=ner_tags) - - document = CoNLLppDocument(text=text, id=doc_id) - - for span in sorted(ner_spans, key=lambda span: span.start): - document.entities.append(span) - - return document diff --git a/dataset_builders/conllpp/dummy/conllpp/1.0.0/dummy_data.zip b/dataset_builders/conllpp/dummy/conllpp/1.0.0/dummy_data.zip deleted file mode 100644 index d1912415..00000000 Binary files a/dataset_builders/conllpp/dummy/conllpp/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/german_legal_entity_recognition/dummy/all/1.0.0/dummy_data.zip b/dataset_builders/german_legal_entity_recognition/dummy/all/1.0.0/dummy_data.zip deleted file mode 100644 index f2b8e8f8..00000000 Binary files a/dataset_builders/german_legal_entity_recognition/dummy/all/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/german_legal_entity_recognition/dummy/bag/1.0.0/dummy_data.zip b/dataset_builders/german_legal_entity_recognition/dummy/bag/1.0.0/dummy_data.zip deleted file mode 100644 index 9c537f4e..00000000 Binary files a/dataset_builders/german_legal_entity_recognition/dummy/bag/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/german_legal_entity_recognition/dummy/bfh/1.0.0/dummy_data.zip b/dataset_builders/german_legal_entity_recognition/dummy/bfh/1.0.0/dummy_data.zip deleted file mode 100644 index 9c537f4e..00000000 Binary files a/dataset_builders/german_legal_entity_recognition/dummy/bfh/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/german_legal_entity_recognition/dummy/bgh/1.0.0/dummy_data.zip b/dataset_builders/german_legal_entity_recognition/dummy/bgh/1.0.0/dummy_data.zip deleted file mode 100644 index 9c537f4e..00000000 Binary files a/dataset_builders/german_legal_entity_recognition/dummy/bgh/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/german_legal_entity_recognition/dummy/bpatg/1.0.0/dummy_data.zip b/dataset_builders/german_legal_entity_recognition/dummy/bpatg/1.0.0/dummy_data.zip deleted file mode 100644 index 9c537f4e..00000000 Binary files a/dataset_builders/german_legal_entity_recognition/dummy/bpatg/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/german_legal_entity_recognition/dummy/bsg/1.0.0/dummy_data.zip b/dataset_builders/german_legal_entity_recognition/dummy/bsg/1.0.0/dummy_data.zip deleted file mode 100644 index 9c537f4e..00000000 Binary files a/dataset_builders/german_legal_entity_recognition/dummy/bsg/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/german_legal_entity_recognition/dummy/bverfg/1.0.0/dummy_data.zip b/dataset_builders/german_legal_entity_recognition/dummy/bverfg/1.0.0/dummy_data.zip deleted file mode 100644 index 9c537f4e..00000000 Binary files a/dataset_builders/german_legal_entity_recognition/dummy/bverfg/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/german_legal_entity_recognition/dummy/bverwg/1.0.0/dummy_data.zip b/dataset_builders/german_legal_entity_recognition/dummy/bverwg/1.0.0/dummy_data.zip deleted file mode 100644 index f2b8e8f8..00000000 Binary files a/dataset_builders/german_legal_entity_recognition/dummy/bverwg/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/german_legal_entity_recognition/german_legal_entity_recognition.py b/dataset_builders/german_legal_entity_recognition/german_legal_entity_recognition.py deleted file mode 100644 index ea0c5934..00000000 --- a/dataset_builders/german_legal_entity_recognition/german_legal_entity_recognition.py +++ /dev/null @@ -1,65 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans - -_VERSION = "1.0.0" -_COURTS = ["bag", "bfh", "bgh", "bpatg", "bsg", "bverfg", "bverwg"] -_COURTS_FILEPATHS = {court: f"{court}.conll" for court in _COURTS} -_ALL = "all" - - -class GermanLegalEntityRecognitionConfig(datasets.BuilderConfig): - def __init__(self, *args, courts=None, **kwargs): - super().__init__(*args, version=datasets.Version(_VERSION, ""), **kwargs) - self.courts = courts - - @property - def filepaths(self): - return [_COURTS_FILEPATHS[court] for court in self.courts] - - -@dataclass -class GermanLegalEntityRecognitionDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class GermanLegalEntityRecognition(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = GermanLegalEntityRecognitionDocument - - BASE_DATASET_PATH = "german_legal_entity_recognition" - - BUILDER_CONFIGS = [ - GermanLegalEntityRecognitionConfig( - name=court, courts=[court], description=f"Court. {court}." - ) - for court in _COURTS - ] + [ - GermanLegalEntityRecognitionConfig( - name=_ALL, courts=_COURTS, description="All courts included." - ) - ] - BUILDER_CONFIG_CLASS = GermanLegalEntityRecognitionConfig - DEFAULT_CONFIG_NAME = _ALL # type: ignore - - def _generate_document_kwargs(self, dataset): - return {"int_to_str": dataset.features["ner_tags"].feature.int2str} - - def _generate_document(self, example, int_to_str): - doc_id = example["id"] - tokens = example["tokens"] - ner_tags = [int_to_str(tag) for tag in example["ner_tags"]] - - text, ner_spans = tokens_and_tags_to_text_and_labeled_spans(tokens=tokens, tags=ner_tags) - - document = GermanLegalEntityRecognitionDocument(text=text, id=doc_id) - - for span in sorted(ner_spans, key=lambda span: span.start): - document.entities.append(span) - - return document diff --git a/dataset_builders/germaner/dummy/germaner/0.9.1/dummy_data.zip b/dataset_builders/germaner/dummy/germaner/0.9.1/dummy_data.zip deleted file mode 100644 index c64c9c18..00000000 Binary files a/dataset_builders/germaner/dummy/germaner/0.9.1/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/germaner/germaner.py b/dataset_builders/germaner/germaner.py deleted file mode 100644 index feb63de8..00000000 --- a/dataset_builders/germaner/germaner.py +++ /dev/null @@ -1,57 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans - - -class GermaNERConfig(datasets.BuilderConfig): - """BuilderConfig for GermaNER.""" - - def __init__(self, **kwargs): - """BuilderConfig for GermaNER. - - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - - -@dataclass -class GermaNERDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class GermaNER(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = GermaNERDocument - - BASE_DATASET_PATH = "germaner" - - BUILDER_CONFIGS = [ - GermaNERConfig( - name="germaner", - version=datasets.Version("0.9.1"), - description="GermaNER dataset", - ), - ] - - def _generate_document_kwargs(self, dataset): - return {"int_to_str": dataset.features["ner_tags"].feature.int2str} - - def _generate_document(self, example, int_to_str): - doc_id = example["id"] - tokens = example["tokens"] - ner_tags = [int_to_str(tag) for tag in example["ner_tags"]] - - text, ner_spans = tokens_and_tags_to_text_and_labeled_spans(tokens=tokens, tags=ner_tags) - - document = GermaNERDocument(text=text, id=doc_id) - - for span in sorted(ner_spans, key=lambda span: span.start): - document.entities.append(span) - - return document diff --git a/dataset_builders/germeval_14/dummy/germeval_14/2.0.0/dummy_data.zip b/dataset_builders/germeval_14/dummy/germeval_14/2.0.0/dummy_data.zip deleted file mode 100644 index be792dfe..00000000 Binary files a/dataset_builders/germeval_14/dummy/germeval_14/2.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/germeval_14/germeval_14.py b/dataset_builders/germeval_14/germeval_14.py deleted file mode 100644 index 668258e1..00000000 --- a/dataset_builders/germeval_14/germeval_14.py +++ /dev/null @@ -1,60 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans - - -class GermEval14Config(datasets.BuilderConfig): - """BuilderConfig for GermEval 2014.""" - - def __init__(self, **kwargs): - """BuilderConfig for GermEval 2014. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - - -@dataclass -class GermEval14Document(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class GermEval14(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = GermEval14Document - - BASE_DATASET_PATH = "germeval_14" - - BUILDER_CONFIGS = [ - GermEval14Config( - name="germeval_14", - version=datasets.Version("2.0.0"), - description="GermEval 2014 NER Shared Task dataset", - ), - ] - - def _generate_document_kwargs(self, dataset): - return {"int_to_str": dataset.features["ner_tags"].feature.int2str} - - def _generate_document(self, example, int_to_str): - doc_id = example["id"] - tokens = example["tokens"] - ner_tags = [int_to_str(tag) for tag in example["ner_tags"]] - nested_ner_tags = [int_to_str(tag) for tag in example["nested_ner_tags"]] - - text, ner_spans = tokens_and_tags_to_text_and_labeled_spans(tokens=tokens, tags=ner_tags) - _, nested_ner_tags = tokens_and_tags_to_text_and_labeled_spans( - tokens=tokens, tags=nested_ner_tags - ) - - document = GermEval14Document(text=text, id=doc_id) - - for span in sorted(ner_spans + nested_ner_tags, key=lambda span: span.start): - document.entities.append(span) - - return document diff --git a/dataset_builders/ncbi_disease/dummy/ncbi_disease/1.0.0/dummy_data.zip b/dataset_builders/ncbi_disease/dummy/ncbi_disease/1.0.0/dummy_data.zip deleted file mode 100644 index 607b3c05..00000000 Binary files a/dataset_builders/ncbi_disease/dummy/ncbi_disease/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/ncbi_disease/ncbi_disease.py b/dataset_builders/ncbi_disease/ncbi_disease.py deleted file mode 100644 index 1e8d69ad..00000000 --- a/dataset_builders/ncbi_disease/ncbi_disease.py +++ /dev/null @@ -1,56 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans - - -class NCBIDiseaseConfig(datasets.BuilderConfig): - """BuilderConfig for NCBIDisease""" - - def __init__(self, **kwargs): - """BuilderConfig for NCBIDisease. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - - -@dataclass -class NCBIDiseaseDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class NCBIDisease(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = NCBIDiseaseDocument - - BASE_DATASET_PATH = "ncbi_disease" - - BUILDER_CONFIGS = [ - NCBIDiseaseConfig( - name="ncbi_disease", - version=datasets.Version("1.0.0"), - description="NCBIDisease dataset", - ), - ] - - def _generate_document_kwargs(self, dataset): - return {"int_to_str": dataset.features["ner_tags"].feature.int2str} - - def _generate_document(self, example, int_to_str): - doc_id = example["id"] - tokens = example["tokens"] - ner_tags = [int_to_str(tag) for tag in example["ner_tags"]] - - text, ner_spans = tokens_and_tags_to_text_and_labeled_spans(tokens=tokens, tags=ner_tags) - - document = NCBIDiseaseDocument(text=text, id=doc_id) - - for span in sorted(ner_spans, key=lambda span: span.start): - document.entities.append(span) - - return document diff --git a/dataset_builders/wikiann/dummy/ace/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ace/1.1.0/dummy_data.zip deleted file mode 100644 index cd76ff3a..00000000 Binary files a/dataset_builders/wikiann/dummy/ace/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/af/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/af/1.1.0/dummy_data.zip deleted file mode 100644 index fed09c66..00000000 Binary files a/dataset_builders/wikiann/dummy/af/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/als/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/als/1.1.0/dummy_data.zip deleted file mode 100644 index a589df7e..00000000 Binary files a/dataset_builders/wikiann/dummy/als/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/am/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/am/1.1.0/dummy_data.zip deleted file mode 100644 index 743b3821..00000000 Binary files a/dataset_builders/wikiann/dummy/am/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/an/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/an/1.1.0/dummy_data.zip deleted file mode 100644 index dbfd0e26..00000000 Binary files a/dataset_builders/wikiann/dummy/an/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ang/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ang/1.1.0/dummy_data.zip deleted file mode 100644 index 511ffbd0..00000000 Binary files a/dataset_builders/wikiann/dummy/ang/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ar/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ar/1.1.0/dummy_data.zip deleted file mode 100644 index 6c4fc1a9..00000000 Binary files a/dataset_builders/wikiann/dummy/ar/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/arc/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/arc/1.1.0/dummy_data.zip deleted file mode 100644 index 05b7791e..00000000 Binary files a/dataset_builders/wikiann/dummy/arc/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/arz/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/arz/1.1.0/dummy_data.zip deleted file mode 100644 index e9ec78b1..00000000 Binary files a/dataset_builders/wikiann/dummy/arz/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/as/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/as/1.1.0/dummy_data.zip deleted file mode 100644 index ad9c8c55..00000000 Binary files a/dataset_builders/wikiann/dummy/as/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ast/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ast/1.1.0/dummy_data.zip deleted file mode 100644 index 77c1da3c..00000000 Binary files a/dataset_builders/wikiann/dummy/ast/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ay/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ay/1.1.0/dummy_data.zip deleted file mode 100644 index ed5e4a33..00000000 Binary files a/dataset_builders/wikiann/dummy/ay/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/az/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/az/1.1.0/dummy_data.zip deleted file mode 100644 index 3dac078c..00000000 Binary files a/dataset_builders/wikiann/dummy/az/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ba/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ba/1.1.0/dummy_data.zip deleted file mode 100644 index f5c3203f..00000000 Binary files a/dataset_builders/wikiann/dummy/ba/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/bar/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/bar/1.1.0/dummy_data.zip deleted file mode 100644 index 22dffbe3..00000000 Binary files a/dataset_builders/wikiann/dummy/bar/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/bat-smg/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/bat-smg/1.1.0/dummy_data.zip deleted file mode 100644 index 346b7fc5..00000000 Binary files a/dataset_builders/wikiann/dummy/bat-smg/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/be-x-old/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/be-x-old/1.1.0/dummy_data.zip deleted file mode 100644 index ae4dd310..00000000 Binary files a/dataset_builders/wikiann/dummy/be-x-old/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/be/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/be/1.1.0/dummy_data.zip deleted file mode 100644 index 7ab5e9ab..00000000 Binary files a/dataset_builders/wikiann/dummy/be/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/bg/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/bg/1.1.0/dummy_data.zip deleted file mode 100644 index bb5bfc45..00000000 Binary files a/dataset_builders/wikiann/dummy/bg/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/bh/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/bh/1.1.0/dummy_data.zip deleted file mode 100644 index 7c16d40f..00000000 Binary files a/dataset_builders/wikiann/dummy/bh/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/bn/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/bn/1.1.0/dummy_data.zip deleted file mode 100644 index 471c8d59..00000000 Binary files a/dataset_builders/wikiann/dummy/bn/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/bo/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/bo/1.1.0/dummy_data.zip deleted file mode 100644 index aa14af13..00000000 Binary files a/dataset_builders/wikiann/dummy/bo/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/br/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/br/1.1.0/dummy_data.zip deleted file mode 100644 index 985f69f8..00000000 Binary files a/dataset_builders/wikiann/dummy/br/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/bs/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/bs/1.1.0/dummy_data.zip deleted file mode 100644 index f4d0d8ff..00000000 Binary files a/dataset_builders/wikiann/dummy/bs/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ca/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ca/1.1.0/dummy_data.zip deleted file mode 100644 index 509e33d5..00000000 Binary files a/dataset_builders/wikiann/dummy/ca/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/cbk-zam/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/cbk-zam/1.1.0/dummy_data.zip deleted file mode 100644 index 5fe4815f..00000000 Binary files a/dataset_builders/wikiann/dummy/cbk-zam/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/cdo/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/cdo/1.1.0/dummy_data.zip deleted file mode 100644 index 3370fb11..00000000 Binary files a/dataset_builders/wikiann/dummy/cdo/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ce/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ce/1.1.0/dummy_data.zip deleted file mode 100644 index 51454d5f..00000000 Binary files a/dataset_builders/wikiann/dummy/ce/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ceb/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ceb/1.1.0/dummy_data.zip deleted file mode 100644 index 16afaee6..00000000 Binary files a/dataset_builders/wikiann/dummy/ceb/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ckb/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ckb/1.1.0/dummy_data.zip deleted file mode 100644 index 84e9afe6..00000000 Binary files a/dataset_builders/wikiann/dummy/ckb/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/co/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/co/1.1.0/dummy_data.zip deleted file mode 100644 index 2aedc3f4..00000000 Binary files a/dataset_builders/wikiann/dummy/co/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/crh/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/crh/1.1.0/dummy_data.zip deleted file mode 100644 index 34726e39..00000000 Binary files a/dataset_builders/wikiann/dummy/crh/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/cs/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/cs/1.1.0/dummy_data.zip deleted file mode 100644 index 061ddd4d..00000000 Binary files a/dataset_builders/wikiann/dummy/cs/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/csb/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/csb/1.1.0/dummy_data.zip deleted file mode 100644 index ec68cbaa..00000000 Binary files a/dataset_builders/wikiann/dummy/csb/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/cv/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/cv/1.1.0/dummy_data.zip deleted file mode 100644 index 3dfb5852..00000000 Binary files a/dataset_builders/wikiann/dummy/cv/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/cy/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/cy/1.1.0/dummy_data.zip deleted file mode 100644 index 11fa5f83..00000000 Binary files a/dataset_builders/wikiann/dummy/cy/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/da/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/da/1.1.0/dummy_data.zip deleted file mode 100644 index d86a6e6c..00000000 Binary files a/dataset_builders/wikiann/dummy/da/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/de/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/de/1.1.0/dummy_data.zip deleted file mode 100644 index 79d28936..00000000 Binary files a/dataset_builders/wikiann/dummy/de/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/diq/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/diq/1.1.0/dummy_data.zip deleted file mode 100644 index d4d8ca9c..00000000 Binary files a/dataset_builders/wikiann/dummy/diq/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/dv/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/dv/1.1.0/dummy_data.zip deleted file mode 100644 index 35b1dcff..00000000 Binary files a/dataset_builders/wikiann/dummy/dv/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/el/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/el/1.1.0/dummy_data.zip deleted file mode 100644 index 710b6247..00000000 Binary files a/dataset_builders/wikiann/dummy/el/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/eml/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/eml/1.1.0/dummy_data.zip deleted file mode 100644 index 3881eca8..00000000 Binary files a/dataset_builders/wikiann/dummy/eml/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/en/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/en/1.1.0/dummy_data.zip deleted file mode 100644 index 1ef8f10d..00000000 Binary files a/dataset_builders/wikiann/dummy/en/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/eo/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/eo/1.1.0/dummy_data.zip deleted file mode 100644 index ee80050f..00000000 Binary files a/dataset_builders/wikiann/dummy/eo/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/es/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/es/1.1.0/dummy_data.zip deleted file mode 100644 index 15cb4a6c..00000000 Binary files a/dataset_builders/wikiann/dummy/es/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/et/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/et/1.1.0/dummy_data.zip deleted file mode 100644 index c17ed76a..00000000 Binary files a/dataset_builders/wikiann/dummy/et/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/eu/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/eu/1.1.0/dummy_data.zip deleted file mode 100644 index 28bf2c89..00000000 Binary files a/dataset_builders/wikiann/dummy/eu/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ext/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ext/1.1.0/dummy_data.zip deleted file mode 100644 index 4ab21d70..00000000 Binary files a/dataset_builders/wikiann/dummy/ext/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/fa/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/fa/1.1.0/dummy_data.zip deleted file mode 100644 index 62c04b12..00000000 Binary files a/dataset_builders/wikiann/dummy/fa/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/fi/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/fi/1.1.0/dummy_data.zip deleted file mode 100644 index a4e52eef..00000000 Binary files a/dataset_builders/wikiann/dummy/fi/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/fiu-vro/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/fiu-vro/1.1.0/dummy_data.zip deleted file mode 100644 index db32f5c4..00000000 Binary files a/dataset_builders/wikiann/dummy/fiu-vro/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/fo/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/fo/1.1.0/dummy_data.zip deleted file mode 100644 index d054b898..00000000 Binary files a/dataset_builders/wikiann/dummy/fo/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/fr/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/fr/1.1.0/dummy_data.zip deleted file mode 100644 index 631f4804..00000000 Binary files a/dataset_builders/wikiann/dummy/fr/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/frr/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/frr/1.1.0/dummy_data.zip deleted file mode 100644 index 00561a33..00000000 Binary files a/dataset_builders/wikiann/dummy/frr/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/fur/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/fur/1.1.0/dummy_data.zip deleted file mode 100644 index 1f552a84..00000000 Binary files a/dataset_builders/wikiann/dummy/fur/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/fy/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/fy/1.1.0/dummy_data.zip deleted file mode 100644 index 710ced51..00000000 Binary files a/dataset_builders/wikiann/dummy/fy/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ga/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ga/1.1.0/dummy_data.zip deleted file mode 100644 index c463309a..00000000 Binary files a/dataset_builders/wikiann/dummy/ga/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/gan/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/gan/1.1.0/dummy_data.zip deleted file mode 100644 index c7fe4ad4..00000000 Binary files a/dataset_builders/wikiann/dummy/gan/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/gd/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/gd/1.1.0/dummy_data.zip deleted file mode 100644 index aea25be2..00000000 Binary files a/dataset_builders/wikiann/dummy/gd/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/gl/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/gl/1.1.0/dummy_data.zip deleted file mode 100644 index 3402c9e7..00000000 Binary files a/dataset_builders/wikiann/dummy/gl/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/gn/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/gn/1.1.0/dummy_data.zip deleted file mode 100644 index 6f9bd80b..00000000 Binary files a/dataset_builders/wikiann/dummy/gn/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/gu/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/gu/1.1.0/dummy_data.zip deleted file mode 100644 index 62e1a23b..00000000 Binary files a/dataset_builders/wikiann/dummy/gu/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/hak/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/hak/1.1.0/dummy_data.zip deleted file mode 100644 index 4ee4bdd1..00000000 Binary files a/dataset_builders/wikiann/dummy/hak/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/he/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/he/1.1.0/dummy_data.zip deleted file mode 100644 index 041799e0..00000000 Binary files a/dataset_builders/wikiann/dummy/he/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/hi/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/hi/1.1.0/dummy_data.zip deleted file mode 100644 index 3e023c26..00000000 Binary files a/dataset_builders/wikiann/dummy/hi/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/hr/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/hr/1.1.0/dummy_data.zip deleted file mode 100644 index 71ae2919..00000000 Binary files a/dataset_builders/wikiann/dummy/hr/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/hsb/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/hsb/1.1.0/dummy_data.zip deleted file mode 100644 index c74515aa..00000000 Binary files a/dataset_builders/wikiann/dummy/hsb/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/hu/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/hu/1.1.0/dummy_data.zip deleted file mode 100644 index 932f4f33..00000000 Binary files a/dataset_builders/wikiann/dummy/hu/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/hy/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/hy/1.1.0/dummy_data.zip deleted file mode 100644 index 5cd3c624..00000000 Binary files a/dataset_builders/wikiann/dummy/hy/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ia/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ia/1.1.0/dummy_data.zip deleted file mode 100644 index b50b8358..00000000 Binary files a/dataset_builders/wikiann/dummy/ia/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/id/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/id/1.1.0/dummy_data.zip deleted file mode 100644 index a25f95f6..00000000 Binary files a/dataset_builders/wikiann/dummy/id/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ig/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ig/1.1.0/dummy_data.zip deleted file mode 100644 index c956f5e5..00000000 Binary files a/dataset_builders/wikiann/dummy/ig/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ilo/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ilo/1.1.0/dummy_data.zip deleted file mode 100644 index 81d95d29..00000000 Binary files a/dataset_builders/wikiann/dummy/ilo/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/io/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/io/1.1.0/dummy_data.zip deleted file mode 100644 index 3009b02a..00000000 Binary files a/dataset_builders/wikiann/dummy/io/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/is/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/is/1.1.0/dummy_data.zip deleted file mode 100644 index 3f39b801..00000000 Binary files a/dataset_builders/wikiann/dummy/is/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/it/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/it/1.1.0/dummy_data.zip deleted file mode 100644 index 62619baf..00000000 Binary files a/dataset_builders/wikiann/dummy/it/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ja/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ja/1.1.0/dummy_data.zip deleted file mode 100644 index 0d0ff262..00000000 Binary files a/dataset_builders/wikiann/dummy/ja/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/jbo/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/jbo/1.1.0/dummy_data.zip deleted file mode 100644 index 0037e08b..00000000 Binary files a/dataset_builders/wikiann/dummy/jbo/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/jv/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/jv/1.1.0/dummy_data.zip deleted file mode 100644 index 3f9033a4..00000000 Binary files a/dataset_builders/wikiann/dummy/jv/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ka/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ka/1.1.0/dummy_data.zip deleted file mode 100644 index 851a0bee..00000000 Binary files a/dataset_builders/wikiann/dummy/ka/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/kk/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/kk/1.1.0/dummy_data.zip deleted file mode 100644 index 9c79f210..00000000 Binary files a/dataset_builders/wikiann/dummy/kk/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/km/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/km/1.1.0/dummy_data.zip deleted file mode 100644 index 0c909439..00000000 Binary files a/dataset_builders/wikiann/dummy/km/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/kn/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/kn/1.1.0/dummy_data.zip deleted file mode 100644 index b592c128..00000000 Binary files a/dataset_builders/wikiann/dummy/kn/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ko/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ko/1.1.0/dummy_data.zip deleted file mode 100644 index cd3b6273..00000000 Binary files a/dataset_builders/wikiann/dummy/ko/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ksh/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ksh/1.1.0/dummy_data.zip deleted file mode 100644 index 501142d4..00000000 Binary files a/dataset_builders/wikiann/dummy/ksh/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ku/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ku/1.1.0/dummy_data.zip deleted file mode 100644 index 46e3ea98..00000000 Binary files a/dataset_builders/wikiann/dummy/ku/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ky/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ky/1.1.0/dummy_data.zip deleted file mode 100644 index e174b9c0..00000000 Binary files a/dataset_builders/wikiann/dummy/ky/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/la/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/la/1.1.0/dummy_data.zip deleted file mode 100644 index 9f77d111..00000000 Binary files a/dataset_builders/wikiann/dummy/la/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/lb/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/lb/1.1.0/dummy_data.zip deleted file mode 100644 index d8720d27..00000000 Binary files a/dataset_builders/wikiann/dummy/lb/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/li/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/li/1.1.0/dummy_data.zip deleted file mode 100644 index e3e30a41..00000000 Binary files a/dataset_builders/wikiann/dummy/li/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/lij/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/lij/1.1.0/dummy_data.zip deleted file mode 100644 index c6d77b16..00000000 Binary files a/dataset_builders/wikiann/dummy/lij/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/lmo/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/lmo/1.1.0/dummy_data.zip deleted file mode 100644 index 0e010f5f..00000000 Binary files a/dataset_builders/wikiann/dummy/lmo/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ln/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ln/1.1.0/dummy_data.zip deleted file mode 100644 index c3970e33..00000000 Binary files a/dataset_builders/wikiann/dummy/ln/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/lt/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/lt/1.1.0/dummy_data.zip deleted file mode 100644 index f25350b4..00000000 Binary files a/dataset_builders/wikiann/dummy/lt/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/lv/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/lv/1.1.0/dummy_data.zip deleted file mode 100644 index 49a6535b..00000000 Binary files a/dataset_builders/wikiann/dummy/lv/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/map-bms/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/map-bms/1.1.0/dummy_data.zip deleted file mode 100644 index a15bd9e1..00000000 Binary files a/dataset_builders/wikiann/dummy/map-bms/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/mg/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/mg/1.1.0/dummy_data.zip deleted file mode 100644 index b809360c..00000000 Binary files a/dataset_builders/wikiann/dummy/mg/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/mhr/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/mhr/1.1.0/dummy_data.zip deleted file mode 100644 index ed9fd269..00000000 Binary files a/dataset_builders/wikiann/dummy/mhr/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/mi/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/mi/1.1.0/dummy_data.zip deleted file mode 100644 index 10d843b2..00000000 Binary files a/dataset_builders/wikiann/dummy/mi/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/min/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/min/1.1.0/dummy_data.zip deleted file mode 100644 index 9759ef37..00000000 Binary files a/dataset_builders/wikiann/dummy/min/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/mk/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/mk/1.1.0/dummy_data.zip deleted file mode 100644 index f42ce09c..00000000 Binary files a/dataset_builders/wikiann/dummy/mk/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ml/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ml/1.1.0/dummy_data.zip deleted file mode 100644 index 9caa31b9..00000000 Binary files a/dataset_builders/wikiann/dummy/ml/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/mn/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/mn/1.1.0/dummy_data.zip deleted file mode 100644 index 2b1b499b..00000000 Binary files a/dataset_builders/wikiann/dummy/mn/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/mr/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/mr/1.1.0/dummy_data.zip deleted file mode 100644 index 77c87fae..00000000 Binary files a/dataset_builders/wikiann/dummy/mr/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ms/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ms/1.1.0/dummy_data.zip deleted file mode 100644 index afa19ec6..00000000 Binary files a/dataset_builders/wikiann/dummy/ms/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/mt/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/mt/1.1.0/dummy_data.zip deleted file mode 100644 index c25f78b7..00000000 Binary files a/dataset_builders/wikiann/dummy/mt/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/mwl/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/mwl/1.1.0/dummy_data.zip deleted file mode 100644 index 672fa685..00000000 Binary files a/dataset_builders/wikiann/dummy/mwl/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/my/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/my/1.1.0/dummy_data.zip deleted file mode 100644 index baeb138b..00000000 Binary files a/dataset_builders/wikiann/dummy/my/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/mzn/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/mzn/1.1.0/dummy_data.zip deleted file mode 100644 index ec5dc245..00000000 Binary files a/dataset_builders/wikiann/dummy/mzn/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/nap/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/nap/1.1.0/dummy_data.zip deleted file mode 100644 index 2637fe81..00000000 Binary files a/dataset_builders/wikiann/dummy/nap/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/nds/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/nds/1.1.0/dummy_data.zip deleted file mode 100644 index 3ed690d1..00000000 Binary files a/dataset_builders/wikiann/dummy/nds/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ne/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ne/1.1.0/dummy_data.zip deleted file mode 100644 index b375920b..00000000 Binary files a/dataset_builders/wikiann/dummy/ne/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/nl/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/nl/1.1.0/dummy_data.zip deleted file mode 100644 index 414472d4..00000000 Binary files a/dataset_builders/wikiann/dummy/nl/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/nn/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/nn/1.1.0/dummy_data.zip deleted file mode 100644 index 06c597c8..00000000 Binary files a/dataset_builders/wikiann/dummy/nn/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/no/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/no/1.1.0/dummy_data.zip deleted file mode 100644 index 186c114f..00000000 Binary files a/dataset_builders/wikiann/dummy/no/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/nov/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/nov/1.1.0/dummy_data.zip deleted file mode 100644 index 080bcb1a..00000000 Binary files a/dataset_builders/wikiann/dummy/nov/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/oc/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/oc/1.1.0/dummy_data.zip deleted file mode 100644 index f4b89916..00000000 Binary files a/dataset_builders/wikiann/dummy/oc/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/or/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/or/1.1.0/dummy_data.zip deleted file mode 100644 index a8cb0106..00000000 Binary files a/dataset_builders/wikiann/dummy/or/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/os/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/os/1.1.0/dummy_data.zip deleted file mode 100644 index 41484f0f..00000000 Binary files a/dataset_builders/wikiann/dummy/os/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/pa/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/pa/1.1.0/dummy_data.zip deleted file mode 100644 index fd0d73bc..00000000 Binary files a/dataset_builders/wikiann/dummy/pa/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/pdc/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/pdc/1.1.0/dummy_data.zip deleted file mode 100644 index 05e5d60f..00000000 Binary files a/dataset_builders/wikiann/dummy/pdc/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/pl/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/pl/1.1.0/dummy_data.zip deleted file mode 100644 index 8a40d12c..00000000 Binary files a/dataset_builders/wikiann/dummy/pl/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/pms/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/pms/1.1.0/dummy_data.zip deleted file mode 100644 index 27739100..00000000 Binary files a/dataset_builders/wikiann/dummy/pms/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/pnb/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/pnb/1.1.0/dummy_data.zip deleted file mode 100644 index 800ef1d4..00000000 Binary files a/dataset_builders/wikiann/dummy/pnb/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ps/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ps/1.1.0/dummy_data.zip deleted file mode 100644 index 15ef561b..00000000 Binary files a/dataset_builders/wikiann/dummy/ps/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/pt/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/pt/1.1.0/dummy_data.zip deleted file mode 100644 index d15f23b6..00000000 Binary files a/dataset_builders/wikiann/dummy/pt/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/qu/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/qu/1.1.0/dummy_data.zip deleted file mode 100644 index eadb8b23..00000000 Binary files a/dataset_builders/wikiann/dummy/qu/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/rm/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/rm/1.1.0/dummy_data.zip deleted file mode 100644 index bde48753..00000000 Binary files a/dataset_builders/wikiann/dummy/rm/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ro/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ro/1.1.0/dummy_data.zip deleted file mode 100644 index 4f3d5809..00000000 Binary files a/dataset_builders/wikiann/dummy/ro/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ru/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ru/1.1.0/dummy_data.zip deleted file mode 100644 index 1eef7feb..00000000 Binary files a/dataset_builders/wikiann/dummy/ru/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/rw/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/rw/1.1.0/dummy_data.zip deleted file mode 100644 index 6a617239..00000000 Binary files a/dataset_builders/wikiann/dummy/rw/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sa/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sa/1.1.0/dummy_data.zip deleted file mode 100644 index e018a2dd..00000000 Binary files a/dataset_builders/wikiann/dummy/sa/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sah/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sah/1.1.0/dummy_data.zip deleted file mode 100644 index a7085c9f..00000000 Binary files a/dataset_builders/wikiann/dummy/sah/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/scn/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/scn/1.1.0/dummy_data.zip deleted file mode 100644 index 59e403f2..00000000 Binary files a/dataset_builders/wikiann/dummy/scn/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sco/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sco/1.1.0/dummy_data.zip deleted file mode 100644 index 24ba099f..00000000 Binary files a/dataset_builders/wikiann/dummy/sco/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sd/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sd/1.1.0/dummy_data.zip deleted file mode 100644 index 5111cc51..00000000 Binary files a/dataset_builders/wikiann/dummy/sd/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sh/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sh/1.1.0/dummy_data.zip deleted file mode 100644 index 463dc506..00000000 Binary files a/dataset_builders/wikiann/dummy/sh/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/si/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/si/1.1.0/dummy_data.zip deleted file mode 100644 index 6a8c5fca..00000000 Binary files a/dataset_builders/wikiann/dummy/si/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/simple/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/simple/1.1.0/dummy_data.zip deleted file mode 100644 index 71db3ac0..00000000 Binary files a/dataset_builders/wikiann/dummy/simple/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sk/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sk/1.1.0/dummy_data.zip deleted file mode 100644 index 954768bd..00000000 Binary files a/dataset_builders/wikiann/dummy/sk/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sl/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sl/1.1.0/dummy_data.zip deleted file mode 100644 index dff090a7..00000000 Binary files a/dataset_builders/wikiann/dummy/sl/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/so/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/so/1.1.0/dummy_data.zip deleted file mode 100644 index 78728b5e..00000000 Binary files a/dataset_builders/wikiann/dummy/so/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sq/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sq/1.1.0/dummy_data.zip deleted file mode 100644 index 7bc2b6a6..00000000 Binary files a/dataset_builders/wikiann/dummy/sq/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sr/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sr/1.1.0/dummy_data.zip deleted file mode 100644 index d2f597e9..00000000 Binary files a/dataset_builders/wikiann/dummy/sr/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/su/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/su/1.1.0/dummy_data.zip deleted file mode 100644 index f3435530..00000000 Binary files a/dataset_builders/wikiann/dummy/su/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sv/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sv/1.1.0/dummy_data.zip deleted file mode 100644 index 509b21b6..00000000 Binary files a/dataset_builders/wikiann/dummy/sv/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/sw/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/sw/1.1.0/dummy_data.zip deleted file mode 100644 index 1660a8e2..00000000 Binary files a/dataset_builders/wikiann/dummy/sw/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/szl/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/szl/1.1.0/dummy_data.zip deleted file mode 100644 index fc4ee94a..00000000 Binary files a/dataset_builders/wikiann/dummy/szl/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ta/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ta/1.1.0/dummy_data.zip deleted file mode 100644 index 3d747855..00000000 Binary files a/dataset_builders/wikiann/dummy/ta/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/te/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/te/1.1.0/dummy_data.zip deleted file mode 100644 index feb941b8..00000000 Binary files a/dataset_builders/wikiann/dummy/te/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/tg/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/tg/1.1.0/dummy_data.zip deleted file mode 100644 index 3574d658..00000000 Binary files a/dataset_builders/wikiann/dummy/tg/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/th/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/th/1.1.0/dummy_data.zip deleted file mode 100644 index 5e2d7a46..00000000 Binary files a/dataset_builders/wikiann/dummy/th/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/tk/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/tk/1.1.0/dummy_data.zip deleted file mode 100644 index f3834f31..00000000 Binary files a/dataset_builders/wikiann/dummy/tk/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/tl/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/tl/1.1.0/dummy_data.zip deleted file mode 100644 index 3bd64d93..00000000 Binary files a/dataset_builders/wikiann/dummy/tl/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/tr/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/tr/1.1.0/dummy_data.zip deleted file mode 100644 index 1077282f..00000000 Binary files a/dataset_builders/wikiann/dummy/tr/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/tt/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/tt/1.1.0/dummy_data.zip deleted file mode 100644 index cc05927f..00000000 Binary files a/dataset_builders/wikiann/dummy/tt/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ug/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ug/1.1.0/dummy_data.zip deleted file mode 100644 index e6bcd0e7..00000000 Binary files a/dataset_builders/wikiann/dummy/ug/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/uk/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/uk/1.1.0/dummy_data.zip deleted file mode 100644 index 89f6fc72..00000000 Binary files a/dataset_builders/wikiann/dummy/uk/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/ur/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/ur/1.1.0/dummy_data.zip deleted file mode 100644 index 735758fa..00000000 Binary files a/dataset_builders/wikiann/dummy/ur/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/uz/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/uz/1.1.0/dummy_data.zip deleted file mode 100644 index 0a115173..00000000 Binary files a/dataset_builders/wikiann/dummy/uz/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/vec/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/vec/1.1.0/dummy_data.zip deleted file mode 100644 index 8aa14b6a..00000000 Binary files a/dataset_builders/wikiann/dummy/vec/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/vep/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/vep/1.1.0/dummy_data.zip deleted file mode 100644 index 4e348299..00000000 Binary files a/dataset_builders/wikiann/dummy/vep/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/vi/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/vi/1.1.0/dummy_data.zip deleted file mode 100644 index db79b259..00000000 Binary files a/dataset_builders/wikiann/dummy/vi/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/vls/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/vls/1.1.0/dummy_data.zip deleted file mode 100644 index f0268212..00000000 Binary files a/dataset_builders/wikiann/dummy/vls/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/vo/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/vo/1.1.0/dummy_data.zip deleted file mode 100644 index ead1e0ff..00000000 Binary files a/dataset_builders/wikiann/dummy/vo/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/wa/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/wa/1.1.0/dummy_data.zip deleted file mode 100644 index eb7ee975..00000000 Binary files a/dataset_builders/wikiann/dummy/wa/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/war/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/war/1.1.0/dummy_data.zip deleted file mode 100644 index 679d5fd8..00000000 Binary files a/dataset_builders/wikiann/dummy/war/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/wuu/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/wuu/1.1.0/dummy_data.zip deleted file mode 100644 index e156a519..00000000 Binary files a/dataset_builders/wikiann/dummy/wuu/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/xmf/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/xmf/1.1.0/dummy_data.zip deleted file mode 100644 index b655a658..00000000 Binary files a/dataset_builders/wikiann/dummy/xmf/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/yi/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/yi/1.1.0/dummy_data.zip deleted file mode 100644 index dcd7ad13..00000000 Binary files a/dataset_builders/wikiann/dummy/yi/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/yo/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/yo/1.1.0/dummy_data.zip deleted file mode 100644 index a5a36aa3..00000000 Binary files a/dataset_builders/wikiann/dummy/yo/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/zea/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/zea/1.1.0/dummy_data.zip deleted file mode 100644 index f461faf7..00000000 Binary files a/dataset_builders/wikiann/dummy/zea/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/zh-classical/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/zh-classical/1.1.0/dummy_data.zip deleted file mode 100644 index 8e132a5b..00000000 Binary files a/dataset_builders/wikiann/dummy/zh-classical/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/zh-min-nan/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/zh-min-nan/1.1.0/dummy_data.zip deleted file mode 100644 index d330ca83..00000000 Binary files a/dataset_builders/wikiann/dummy/zh-min-nan/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/zh-yue/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/zh-yue/1.1.0/dummy_data.zip deleted file mode 100644 index d7be6d57..00000000 Binary files a/dataset_builders/wikiann/dummy/zh-yue/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/dummy/zh/1.1.0/dummy_data.zip b/dataset_builders/wikiann/dummy/zh/1.1.0/dummy_data.zip deleted file mode 100644 index f748c15f..00000000 Binary files a/dataset_builders/wikiann/dummy/zh/1.1.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wikiann/wikiann.py b/dataset_builders/wikiann/wikiann.py deleted file mode 100644 index 634d9799..00000000 --- a/dataset_builders/wikiann/wikiann.py +++ /dev/null @@ -1,226 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans - -_VERSION = "1.1.0" -_LANGS = [ - "ace", - "af", - "als", - "am", - "an", - "ang", - "ar", - "arc", - "arz", - "as", - "ast", - "ay", - "az", - "ba", - "bar", - "bat-smg", - "be", - "be-x-old", - "bg", - "bh", - "bn", - "bo", - "br", - "bs", - "ca", - "cbk-zam", - "cdo", - "ce", - "ceb", - "ckb", - "co", - "crh", - "cs", - "csb", - "cv", - "cy", - "da", - "de", - "diq", - "dv", - "el", - "eml", - "en", - "eo", - "es", - "et", - "eu", - "ext", - "fa", - "fi", - "fiu-vro", - "fo", - "fr", - "frr", - "fur", - "fy", - "ga", - "gan", - "gd", - "gl", - "gn", - "gu", - "hak", - "he", - "hi", - "hr", - "hsb", - "hu", - "hy", - "ia", - "id", - "ig", - "ilo", - "io", - "is", - "it", - "ja", - "jbo", - "jv", - "ka", - "kk", - "km", - "kn", - "ko", - "ksh", - "ku", - "ky", - "la", - "lb", - "li", - "lij", - "lmo", - "ln", - "lt", - "lv", - "map-bms", - "mg", - "mhr", - "mi", - "min", - "mk", - "ml", - "mn", - "mr", - "ms", - "mt", - "mwl", - "my", - "mzn", - "nap", - "nds", - "ne", - "nl", - "nn", - "no", - "nov", - "oc", - "or", - "os", - "pa", - "pdc", - "pl", - "pms", - "pnb", - "ps", - "pt", - "qu", - "rm", - "ro", - "ru", - "rw", - "sa", - "sah", - "scn", - "sco", - "sd", - "sh", - "si", - "simple", - "sk", - "sl", - "so", - "sq", - "sr", - "su", - "sv", - "sw", - "szl", - "ta", - "te", - "tg", - "th", - "tk", - "tl", - "tr", - "tt", - "ug", - "uk", - "ur", - "uz", - "vec", - "vep", - "vi", - "vls", - "vo", - "wa", - "war", - "wuu", - "xmf", - "yi", - "yo", - "zea", - "zh", - "zh-classical", - "zh-min-nan", - "zh-yue", -] - - -class WikiANNConfig(datasets.BuilderConfig): - def __init__(self, **kwargs): - super().__init__(version=datasets.Version(_VERSION, ""), **kwargs) - - -@dataclass -class WikiANNDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class WikiANN(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = WikiANNDocument - - BASE_DATASET_PATH = "wikiann" - - BUILDER_CONFIGS = [ - WikiANNConfig(name=lang, description=f"WikiANN NER examples in language {lang}") - for lang in _LANGS - ] - - def _generate_document_kwargs(self, dataset): - return {"int_to_str": dataset.features["ner_tags"].feature.int2str} - - def _generate_document(self, example, int_to_str): - tokens = example["tokens"] - ner_tags = [int_to_str(tag) for tag in example["ner_tags"]] - - text, ner_spans = tokens_and_tags_to_text_and_labeled_spans(tokens=tokens, tags=ner_tags) - - document = WikiANNDocument(text=text, id=None) - - for span in sorted(ner_spans, key=lambda span: span.start): - document.entities.append(span) - - return document diff --git a/dataset_builders/wnut_17/dummy/wnut_17/1.0.0/dummy_data.zip b/dataset_builders/wnut_17/dummy/wnut_17/1.0.0/dummy_data.zip deleted file mode 100644 index 5d185e64..00000000 Binary files a/dataset_builders/wnut_17/dummy/wnut_17/1.0.0/dummy_data.zip and /dev/null differ diff --git a/dataset_builders/wnut_17/wnut_17.py b/dataset_builders/wnut_17/wnut_17.py deleted file mode 100644 index 82162e36..00000000 --- a/dataset_builders/wnut_17/wnut_17.py +++ /dev/null @@ -1,58 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans - - -class WNUT_17Config(datasets.BuilderConfig): - """The WNUT 17 Emerging Entities Dataset.""" - - def __init__(self, **kwargs): - """BuilderConfig for WNUT 17. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - - -@dataclass -class WNUT17Document(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class WNUT17(pytorch_ie.data.builder.GeneratorBasedBuilder): - """The WNUT 17 Emerging Entities Dataset.""" - - DOCUMENT_TYPE = WNUT17Document - - BASE_DATASET_PATH = "wnut_17" - - BUILDER_CONFIGS = [ - WNUT_17Config( - name="wnut_17", - version=datasets.Version("1.0.0"), - description="The WNUT 17 Emerging Entities Dataset", - ), - ] - - def _generate_document_kwargs(self, dataset): - return {"int_to_str": dataset.features["ner_tags"].feature.int2str} - - def _generate_document(self, example, int_to_str): - doc_id = example["id"] - tokens = example["tokens"] - ner_tags = [int_to_str(tag) for tag in example["ner_tags"]] - - text, ner_spans = tokens_and_tags_to_text_and_labeled_spans(tokens=tokens, tags=ner_tags) - - document = WNUT17Document(text=text, id=doc_id) - - for span in sorted(ner_spans, key=lambda span: span.start): - document.entities.append(span) - - return document diff --git a/noxfile.py b/noxfile.py index 80f480f0..561a89de 100644 --- a/noxfile.py +++ b/noxfile.py @@ -143,7 +143,7 @@ def safety(session: Session) -> None: @session(python=python_versions) def mypy(session: Session) -> None: """Type-check using mypy.""" - args = session.posargs or ["src", "tests", "docs/conf.py", "dataset_builders"] + args = session.posargs or ["src", "tests", "docs/conf.py"] session.install(".") session.install("mypy", "pytest") session.run("mypy", *args) @@ -177,48 +177,6 @@ def tests_not_slow(session: Session) -> None: session.notify("coverage", posargs=[]) -@session(python=python_versions) -def tests_no_local_datasets(session: Session) -> None: - """Run the test suite.""" - session.install(".") - session.install("coverage[toml]", "pytest", "pygments", "sh") - try: - session.run( - "coverage", - "run", - "--parallel", - "-m", - "pytest", - "-k", - "not LocalDatasetTest", - *session.posargs, - ) - finally: - if session.interactive: - session.notify("coverage", posargs=[]) - - -@session(python=python_versions) -def tests_only_local_datasets(session: Session) -> None: - """Run the test suite.""" - session.install(".") - session.install("coverage[toml]", "pytest", "pygments", "sh") - try: - session.run( - "coverage", - "run", - "--parallel", - "-m", - "pytest", - "-k", - "LocalDatasetTest", - *session.posargs, - ) - finally: - if session.interactive: - session.notify("coverage", posargs=[]) - - @session(python=python_versions[0]) def coverage(session: Session) -> None: """Produce the coverage report.""" diff --git a/poetry.lock b/poetry.lock index ca8f5662..7cc16625 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,18 +1,111 @@ +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. + [[package]] name = "absl-py" version = "1.4.0" description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py." -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "absl-py-1.4.0.tar.gz", hash = "sha256:d2c244d01048ba476e7c080bd2c6df5e141d211de80223460d5b3b8a2a58433d"}, + {file = "absl_py-1.4.0-py3-none-any.whl", hash = "sha256:0d3fe606adfa4f7db64792dd4c7aee4ee0c38ab75dfd353b7a83ed3e957fcb47"}, +] [[package]] name = "aiohttp" version = "3.8.5" description = "Async http client/server framework (asyncio)" -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a94159871304770da4dd371f4291b20cac04e8c94f11bdea1c3478e557fbe0d8"}, + {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13bf85afc99ce6f9ee3567b04501f18f9f8dbbb2ea11ed1a2e079670403a7c84"}, + {file = "aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ce2ac5708501afc4847221a521f7e4b245abf5178cf5ddae9d5b3856ddb2f3a"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96943e5dcc37a6529d18766597c491798b7eb7a61d48878611298afc1fca946c"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ad5c3c4590bb3cc28b4382f031f3783f25ec223557124c68754a2231d989e2b"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c413c633d0512df4dc7fd2373ec06cc6a815b7b6d6c2f208ada7e9e93a5061d"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df72ac063b97837a80d80dec8d54c241af059cc9bb42c4de68bd5b61ceb37caa"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c48c5c0271149cfe467c0ff8eb941279fd6e3f65c9a388c984e0e6cf57538e14"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:368a42363c4d70ab52c2c6420a57f190ed3dfaca6a1b19afda8165ee16416a82"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7607ec3ce4993464368505888af5beb446845a014bc676d349efec0e05085905"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0d21c684808288a98914e5aaf2a7c6a3179d4df11d249799c32d1808e79503b5"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:312fcfbacc7880a8da0ae8b6abc6cc7d752e9caa0051a53d217a650b25e9a691"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad093e823df03bb3fd37e7dec9d4670c34f9e24aeace76808fc20a507cace825"}, + {file = "aiohttp-3.8.5-cp310-cp310-win32.whl", hash = "sha256:33279701c04351a2914e1100b62b2a7fdb9a25995c4a104259f9a5ead7ed4802"}, + {file = "aiohttp-3.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:6e4a280e4b975a2e7745573e3fc9c9ba0d1194a3738ce1cbaa80626cc9b4f4df"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae871a964e1987a943d83d6709d20ec6103ca1eaf52f7e0d36ee1b5bebb8b9b9"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:461908b2578955045efde733719d62f2b649c404189a09a632d245b445c9c975"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72a860c215e26192379f57cae5ab12b168b75db8271f111019509a1196dfc780"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc14be025665dba6202b6a71cfcdb53210cc498e50068bc088076624471f8bb9"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8af740fc2711ad85f1a5c034a435782fbd5b5f8314c9a3ef071424a8158d7f6b"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:841cd8233cbd2111a0ef0a522ce016357c5e3aff8a8ce92bcfa14cef890d698f"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ed1c46fb119f1b59304b5ec89f834f07124cd23ae5b74288e364477641060ff"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84f8ae3e09a34f35c18fa57f015cc394bd1389bce02503fb30c394d04ee6b938"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62360cb771707cb70a6fd114b9871d20d7dd2163a0feafe43fd115cfe4fe845e"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:23fb25a9f0a1ca1f24c0a371523546366bb642397c94ab45ad3aedf2941cec6a"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0ba0d15164eae3d878260d4c4df859bbdc6466e9e6689c344a13334f988bb53"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5d20003b635fc6ae3f96d7260281dfaf1894fc3aa24d1888a9b2628e97c241e5"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0175d745d9e85c40dcc51c8f88c74bfbaef9e7afeeeb9d03c37977270303064c"}, + {file = "aiohttp-3.8.5-cp311-cp311-win32.whl", hash = "sha256:2e1b1e51b0774408f091d268648e3d57f7260c1682e7d3a63cb00d22d71bb945"}, + {file = "aiohttp-3.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:043d2299f6dfdc92f0ac5e995dfc56668e1587cea7f9aa9d8a78a1b6554e5755"}, + {file = "aiohttp-3.8.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cae533195e8122584ec87531d6df000ad07737eaa3c81209e85c928854d2195c"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f21e83f355643c345177a5d1d8079f9f28b5133bcd154193b799d380331d5d3"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a75ef35f2df54ad55dbf4b73fe1da96f370e51b10c91f08b19603c64004acc"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e2e9839e14dd5308ee773c97115f1e0a1cb1d75cbeeee9f33824fa5144c7634"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44e65da1de4403d0576473e2344828ef9c4c6244d65cf4b75549bb46d40b8dd"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78d847e4cde6ecc19125ccbc9bfac4a7ab37c234dd88fbb3c5c524e8e14da543"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:c7a815258e5895d8900aec4454f38dca9aed71085f227537208057853f9d13f2"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:8b929b9bd7cd7c3939f8bcfffa92fae7480bd1aa425279d51a89327d600c704d"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:5db3a5b833764280ed7618393832e0853e40f3d3e9aa128ac0ba0f8278d08649"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:a0215ce6041d501f3155dc219712bc41252d0ab76474615b9700d63d4d9292af"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:fd1ed388ea7fbed22c4968dd64bab0198de60750a25fe8c0c9d4bef5abe13824"}, + {file = "aiohttp-3.8.5-cp36-cp36m-win32.whl", hash = "sha256:6e6783bcc45f397fdebc118d772103d751b54cddf5b60fbcc958382d7dd64f3e"}, + {file = "aiohttp-3.8.5-cp36-cp36m-win_amd64.whl", hash = "sha256:b5411d82cddd212644cf9360879eb5080f0d5f7d809d03262c50dad02f01421a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:01d4c0c874aa4ddfb8098e85d10b5e875a70adc63db91f1ae65a4b04d3344cda"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5980a746d547a6ba173fd5ee85ce9077e72d118758db05d229044b469d9029a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a482e6da906d5e6e653be079b29bc173a48e381600161c9932d89dfae5942ef"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80bd372b8d0715c66c974cf57fe363621a02f359f1ec81cba97366948c7fc873"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1161b345c0a444ebcf46bf0a740ba5dcf50612fd3d0528883fdc0eff578006a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd56db019015b6acfaaf92e1ac40eb8434847d9bf88b4be4efe5bfd260aee692"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:153c2549f6c004d2754cc60603d4668899c9895b8a89397444a9c4efa282aaf4"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4a01951fabc4ce26ab791da5f3f24dca6d9a6f24121746eb19756416ff2d881b"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bfb9162dcf01f615462b995a516ba03e769de0789de1cadc0f916265c257e5d8"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:7dde0009408969a43b04c16cbbe252c4f5ef4574ac226bc8815cd7342d2028b6"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4149d34c32f9638f38f544b3977a4c24052042affa895352d3636fa8bffd030a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-win32.whl", hash = "sha256:68c5a82c8779bdfc6367c967a4a1b2aa52cd3595388bf5961a62158ee8a59e22"}, + {file = "aiohttp-3.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2cf57fb50be5f52bda004b8893e63b48530ed9f0d6c96c84620dc92fe3cd9b9d"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:eca4bf3734c541dc4f374ad6010a68ff6c6748f00451707f39857f429ca36ced"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1274477e4c71ce8cfe6c1ec2f806d57c015ebf84d83373676036e256bc55d690"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:28c543e54710d6158fc6f439296c7865b29e0b616629767e685a7185fab4a6b9"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:910bec0c49637d213f5d9877105d26e0c4a4de2f8b1b29405ff37e9fc0ad52b8"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5443910d662db951b2e58eb70b0fbe6b6e2ae613477129a5805d0b66c54b6cb7"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e460be6978fc24e3df83193dc0cc4de46c9909ed92dd47d349a452ef49325b7"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb1558def481d84f03b45888473fc5a1f35747b5f334ef4e7a571bc0dfcb11f8"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34dd0c107799dcbbf7d48b53be761a013c0adf5571bf50c4ecad5643fe9cfcd0"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aa1990247f02a54185dc0dff92a6904521172a22664c863a03ff64c42f9b5410"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0e584a10f204a617d71d359fe383406305a4b595b333721fa50b867b4a0a1548"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a3cf433f127efa43fee6b90ea4c6edf6c4a17109d1d037d1a52abec84d8f2e42"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c11f5b099adafb18e65c2c997d57108b5bbeaa9eeee64a84302c0978b1ec948b"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:84de26ddf621d7ac4c975dbea4c945860e08cccde492269db4e1538a6a6f3c35"}, + {file = "aiohttp-3.8.5-cp38-cp38-win32.whl", hash = "sha256:ab88bafedc57dd0aab55fa728ea10c1911f7e4d8b43e1d838a1739f33712921c"}, + {file = "aiohttp-3.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:5798a9aad1879f626589f3df0f8b79b3608a92e9beab10e5fda02c8a2c60db2e"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a6ce61195c6a19c785df04e71a4537e29eaa2c50fe745b732aa937c0c77169f3"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:773dd01706d4db536335fcfae6ea2440a70ceb03dd3e7378f3e815b03c97ab51"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f83a552443a526ea38d064588613aca983d0ee0038801bc93c0c916428310c28"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f7372f7341fcc16f57b2caded43e81ddd18df53320b6f9f042acad41f8e049a"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea353162f249c8097ea63c2169dd1aa55de1e8fecbe63412a9bc50816e87b761"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d47ae48db0b2dcf70bc8a3bc72b3de86e2a590fc299fdbbb15af320d2659de"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d827176898a2b0b09694fbd1088c7a31836d1a505c243811c87ae53a3f6273c1"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3562b06567c06439d8b447037bb655ef69786c590b1de86c7ab81efe1c9c15d8"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4e874cbf8caf8959d2adf572a78bba17cb0e9d7e51bb83d86a3697b686a0ab4d"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6809a00deaf3810e38c628e9a33271892f815b853605a936e2e9e5129762356c"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:33776e945d89b29251b33a7e7d006ce86447b2cfd66db5e5ded4e5cd0340585c"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eaeed7abfb5d64c539e2db173f63631455f1196c37d9d8d873fc316470dfbacd"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e91d635961bec2d8f19dfeb41a539eb94bd073f075ca6dae6c8dc0ee89ad6f91"}, + {file = "aiohttp-3.8.5-cp39-cp39-win32.whl", hash = "sha256:00ad4b6f185ec67f3e6562e8a1d2b69660be43070bd0ef6fcec5211154c7df67"}, + {file = "aiohttp-3.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:c0a9034379a37ae42dea7ac1e048352d96286626251862e448933c0f59cbd79c"}, + {file = "aiohttp-3.8.5.tar.gz", hash = "sha256:b9552ec52cc147dbf1944ac7ac98af7602e51ea2dcd076ed194ca3c0d1c7d0bc"}, +] [package.dependencies] aiosignal = ">=1.1.2" @@ -24,15 +117,18 @@ multidict = ">=4.5,<7.0" yarl = ">=1.0,<2.0" [package.extras] -speedups = ["aiodns", "brotli", "cchardet"] +speedups = ["Brotli", "aiodns", "cchardet"] [[package]] name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] [package.dependencies] frozenlist = ">=1.1.0" @@ -41,17 +137,23 @@ frozenlist = ">=1.1.0" name = "alabaster" version = "0.7.13" description = "A configurable sidebar-enabled Sphinx theme" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3"}, + {file = "alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2"}, +] [[package]] name = "argcomplete" version = "3.1.1" description = "Bash tab completion for argparse" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "argcomplete-3.1.1-py3-none-any.whl", hash = "sha256:35fa893a88deea85ea7b20d241100e64516d6af6d7b0ae2bed1d263d26f70948"}, + {file = "argcomplete-3.1.1.tar.gz", hash = "sha256:6c4c563f14f01440aaffa3eae13441c5db2357b5eec639abe7c0b15334627dff"}, +] [package.extras] test = ["coverage", "mypy", "pexpect", "ruff", "wheel"] @@ -60,40 +162,52 @@ test = ["coverage", "mypy", "pexpect", "ruff", "wheel"] name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, + {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, +] [[package]] name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, +] [package.extras] -cov = ["attrs", "coverage[toml] (>=5.3)"] -dev = ["attrs", "pre-commit"] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]", "pre-commit"] docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] -tests = ["attrs", "zope-interface"] -tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] [[package]] name = "babel" version = "2.12.1" description = "Internationalization utilities" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "Babel-2.12.1-py3-none-any.whl", hash = "sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610"}, + {file = "Babel-2.12.1.tar.gz", hash = "sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455"}, +] [[package]] name = "bandit" version = "1.7.5" description = "Security oriented static analyser for python code." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "bandit-1.7.5-py3-none-any.whl", hash = "sha256:75665181dc1e0096369112541a056c59d1c5f66f9bb74a8d686c3c362b83f549"}, + {file = "bandit-1.7.5.tar.gz", hash = "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e"}, +] [package.dependencies] colorama = {version = ">=0.3.9", markers = "platform_system == \"Windows\""} @@ -105,15 +219,18 @@ stevedore = ">=1.20.0" [package.extras] test = ["beautifulsoup4 (>=4.8.0)", "coverage (>=4.5.4)", "fixtures (>=3.0.0)", "flake8 (>=4.0.0)", "pylint (==1.9.4)", "stestr (>=2.5.0)", "testscenarios (>=0.5.0)", "testtools (>=2.3.0)", "tomli (>=1.1.0)"] toml = ["tomli (>=1.1.0)"] -yaml = ["pyyaml"] +yaml = ["PyYAML"] [[package]] name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" -category = "dev" optional = false python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] [package.dependencies] soupsieve = ">1.2" @@ -126,9 +243,22 @@ lxml = ["lxml"] name = "black" version = "22.12.0" description = "The uncompromising code formatter." -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "black-22.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eedd20838bd5d75b80c9f5487dbcb06836a43833a37846cf1d8c1cc01cef59d"}, + {file = "black-22.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:159a46a4947f73387b4d83e87ea006dbb2337eab6c879620a3ba52699b1f4351"}, + {file = "black-22.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d30b212bffeb1e252b31dd269dfae69dd17e06d92b87ad26e23890f3efea366f"}, + {file = "black-22.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:7412e75863aa5c5411886804678b7d083c7c28421210180d67dfd8cf1221e1f4"}, + {file = "black-22.12.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c116eed0efb9ff870ded8b62fe9f28dd61ef6e9ddd28d83d7d264a38417dcee2"}, + {file = "black-22.12.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1f58cbe16dfe8c12b7434e50ff889fa479072096d79f0a7f25e4ab8e94cd8350"}, + {file = "black-22.12.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77d86c9f3db9b1bf6761244bc0b3572a546f5fe37917a044e02f3166d5aafa7d"}, + {file = "black-22.12.0-cp38-cp38-win_amd64.whl", hash = "sha256:82d9fe8fee3401e02e79767016b4907820a7dc28d70d137eb397b92ef3cc5bfc"}, + {file = "black-22.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101c69b23df9b44247bd88e1d7e90154336ac4992502d4197bdac35dd7ee3320"}, + {file = "black-22.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:559c7a1ba9a006226f09e4916060982fd27334ae1998e7a38b3f33a37f7a2148"}, + {file = "black-22.12.0-py3-none-any.whl", hash = "sha256:436cc9167dd28040ad90d3b404aec22cedf24a6e4d7de221bec2730ec0c97bcf"}, + {file = "black-22.12.0.tar.gz", hash = "sha256:229351e5a18ca30f447bf724d007f890f97e13af070bb6ad4c0a441cd7596a2f"}, +] [package.dependencies] click = ">=8.0.0" @@ -148,33 +278,118 @@ uvloop = ["uvloop (>=0.15.2)"] name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, +] [[package]] name = "cfgv" version = "3.4.0" description = "Validate configuration and produce human readable error messages." -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] [[package]] name = "charset-normalizer" version = "3.2.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, + {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, +] [[package]] name = "click" version = "8.1.7" description = "Composable command line interface toolkit" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} @@ -183,17 +398,23 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] [[package]] name = "colorlog" version = "6.7.0" description = "Add colours to the output of Python's logging module." -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "colorlog-6.7.0-py2.py3-none-any.whl", hash = "sha256:0d33ca236784a1ba3ff9c532d4964126d8a2c44f1f0cb1d2b0728196f512f662"}, + {file = "colorlog-6.7.0.tar.gz", hash = "sha256:bd94bd21c1e13fac7bd3153f4bc3a7dc0eb0974b8bc2fdf1a989e474f6e582e5"}, +] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} @@ -205,97 +426,112 @@ development = ["black", "flake8", "mypy", "pytest", "types-colorama"] name = "coverage" version = "7.3.1" description = "Code coverage measurement for Python" -category = "dev" optional = false python-versions = ">=3.8" - -[package.dependencies] -tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} - -[package.extras] -toml = ["tomli"] - -[[package]] -name = "darglint" -version = "1.8.1" -description = "A utility for ensuring Google-style docstrings stay up to date with the source code." -category = "dev" -optional = false -python-versions = ">=3.6,<4.0" - -[[package]] -name = "datasets" -version = "2.14.5" -description = "HuggingFace community-driven open-source library of datasets" -category = "main" -optional = false -python-versions = ">=3.8.0" - -[package.dependencies] -aiohttp = "*" -dill = ">=0.3.0,<0.3.8" -fsspec = {version = ">=2023.1.0,<2023.9.0", extras = ["http"]} -huggingface-hub = ">=0.14.0,<1.0.0" -multiprocess = "*" -numpy = ">=1.17" -packaging = "*" -pandas = "*" -pyarrow = ">=8.0.0" -pyyaml = ">=5.1" -requests = ">=2.19.0" -tqdm = ">=4.62.1" -xxhash = "*" - -[package.extras] -apache-beam = ["apache-beam (>=2.26.0,<2.44.0)"] -audio = ["librosa", "soundfile (>=0.12.1)"] -benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] -dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "black (>=23.1,<24.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "pyyaml (>=5.3.1)", "rarfile (>=4.0)", "ruff (>=0.0.241)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"] -docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"] -jax = ["jax (>=0.2.8,!=0.3.2,<=0.3.25)", "jaxlib (>=0.1.65,<=0.3.25)"] -metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] -quality = ["black (>=23.1,<24.0)", "pyyaml (>=5.3.1)", "ruff (>=0.0.241)"] -s3 = ["s3fs"] -tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"] -tensorflow_gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"] -tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"] -torch = ["torch"] -vision = ["Pillow (>=6.2.1)"] - -[[package]] -name = "dill" -version = "0.3.7" -description = "serialize all of Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -graph = ["objgraph (>=1.7.2)"] - -[[package]] -name = "distlib" +files = [ + {file = "coverage-7.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cd0f7429ecfd1ff597389907045ff209c8fdb5b013d38cfa7c60728cb484b6e3"}, + {file = "coverage-7.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:966f10df9b2b2115da87f50f6a248e313c72a668248be1b9060ce935c871f276"}, + {file = "coverage-7.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0575c37e207bb9b98b6cf72fdaaa18ac909fb3d153083400c2d48e2e6d28bd8e"}, + {file = "coverage-7.3.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:245c5a99254e83875c7fed8b8b2536f040997a9b76ac4c1da5bff398c06e860f"}, + {file = "coverage-7.3.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c96dd7798d83b960afc6c1feb9e5af537fc4908852ef025600374ff1a017392"}, + {file = "coverage-7.3.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:de30c1aa80f30af0f6b2058a91505ea6e36d6535d437520067f525f7df123887"}, + {file = "coverage-7.3.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:50dd1e2dd13dbbd856ffef69196781edff26c800a74f070d3b3e3389cab2600d"}, + {file = "coverage-7.3.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9c0c19f70d30219113b18fe07e372b244fb2a773d4afde29d5a2f7930765136"}, + {file = "coverage-7.3.1-cp310-cp310-win32.whl", hash = "sha256:770f143980cc16eb601ccfd571846e89a5fe4c03b4193f2e485268f224ab602f"}, + {file = "coverage-7.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:cdd088c00c39a27cfa5329349cc763a48761fdc785879220d54eb785c8a38520"}, + {file = "coverage-7.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:74bb470399dc1989b535cb41f5ca7ab2af561e40def22d7e188e0a445e7639e3"}, + {file = "coverage-7.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:025ded371f1ca280c035d91b43252adbb04d2aea4c7105252d3cbc227f03b375"}, + {file = "coverage-7.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6191b3a6ad3e09b6cfd75b45c6aeeffe7e3b0ad46b268345d159b8df8d835f9"}, + {file = "coverage-7.3.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7eb0b188f30e41ddd659a529e385470aa6782f3b412f860ce22b2491c89b8593"}, + {file = "coverage-7.3.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c8f0df9dfd8ff745bccff75867d63ef336e57cc22b2908ee725cc552689ec8"}, + {file = "coverage-7.3.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7eb3cd48d54b9bd0e73026dedce44773214064be93611deab0b6a43158c3d5a0"}, + {file = "coverage-7.3.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ac3c5b7e75acac31e490b7851595212ed951889918d398b7afa12736c85e13ce"}, + {file = "coverage-7.3.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5b4ee7080878077af0afa7238df1b967f00dc10763f6e1b66f5cced4abebb0a3"}, + {file = "coverage-7.3.1-cp311-cp311-win32.whl", hash = "sha256:229c0dd2ccf956bf5aeede7e3131ca48b65beacde2029f0361b54bf93d36f45a"}, + {file = "coverage-7.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c6f55d38818ca9596dc9019eae19a47410d5322408140d9a0076001a3dcb938c"}, + {file = "coverage-7.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5289490dd1c3bb86de4730a92261ae66ea8d44b79ed3cc26464f4c2cde581fbc"}, + {file = "coverage-7.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ca833941ec701fda15414be400c3259479bfde7ae6d806b69e63b3dc423b1832"}, + {file = "coverage-7.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd694e19c031733e446c8024dedd12a00cda87e1c10bd7b8539a87963685e969"}, + {file = "coverage-7.3.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aab8e9464c00da5cb9c536150b7fbcd8850d376d1151741dd0d16dfe1ba4fd26"}, + {file = "coverage-7.3.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87d38444efffd5b056fcc026c1e8d862191881143c3aa80bb11fcf9dca9ae204"}, + {file = "coverage-7.3.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8a07b692129b8a14ad7a37941a3029c291254feb7a4237f245cfae2de78de037"}, + {file = "coverage-7.3.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:2829c65c8faaf55b868ed7af3c7477b76b1c6ebeee99a28f59a2cb5907a45760"}, + {file = "coverage-7.3.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1f111a7d85658ea52ffad7084088277135ec5f368457275fc57f11cebb15607f"}, + {file = "coverage-7.3.1-cp312-cp312-win32.whl", hash = "sha256:c397c70cd20f6df7d2a52283857af622d5f23300c4ca8e5bd8c7a543825baa5a"}, + {file = "coverage-7.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:5ae4c6da8b3d123500f9525b50bf0168023313963e0e2e814badf9000dd6ef92"}, + {file = "coverage-7.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca70466ca3a17460e8fc9cea7123c8cbef5ada4be3140a1ef8f7b63f2f37108f"}, + {file = "coverage-7.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f2781fd3cabc28278dc982a352f50c81c09a1a500cc2086dc4249853ea96b981"}, + {file = "coverage-7.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6407424621f40205bbe6325686417e5e552f6b2dba3535dd1f90afc88a61d465"}, + {file = "coverage-7.3.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:04312b036580ec505f2b77cbbdfb15137d5efdfade09156961f5277149f5e344"}, + {file = "coverage-7.3.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac9ad38204887349853d7c313f53a7b1c210ce138c73859e925bc4e5d8fc18e7"}, + {file = "coverage-7.3.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:53669b79f3d599da95a0afbef039ac0fadbb236532feb042c534fbb81b1a4e40"}, + {file = "coverage-7.3.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:614f1f98b84eb256e4f35e726bfe5ca82349f8dfa576faabf8a49ca09e630086"}, + {file = "coverage-7.3.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f1a317fdf5c122ad642db8a97964733ab7c3cf6009e1a8ae8821089993f175ff"}, + {file = "coverage-7.3.1-cp38-cp38-win32.whl", hash = "sha256:defbbb51121189722420a208957e26e49809feafca6afeef325df66c39c4fdb3"}, + {file = "coverage-7.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:f4f456590eefb6e1b3c9ea6328c1e9fa0f1006e7481179d749b3376fc793478e"}, + {file = "coverage-7.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f12d8b11a54f32688b165fd1a788c408f927b0960984b899be7e4c190ae758f1"}, + {file = "coverage-7.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f09195dda68d94a53123883de75bb97b0e35f5f6f9f3aa5bf6e496da718f0cb6"}, + {file = "coverage-7.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6601a60318f9c3945be6ea0f2a80571f4299b6801716f8a6e4846892737ebe4"}, + {file = "coverage-7.3.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07d156269718670d00a3b06db2288b48527fc5f36859425ff7cec07c6b367745"}, + {file = "coverage-7.3.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:636a8ac0b044cfeccae76a36f3b18264edcc810a76a49884b96dd744613ec0b7"}, + {file = "coverage-7.3.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5d991e13ad2ed3aced177f524e4d670f304c8233edad3210e02c465351f785a0"}, + {file = "coverage-7.3.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:586649ada7cf139445da386ab6f8ef00e6172f11a939fc3b2b7e7c9082052fa0"}, + {file = "coverage-7.3.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4aba512a15a3e1e4fdbfed2f5392ec221434a614cc68100ca99dcad7af29f3f8"}, + {file = "coverage-7.3.1-cp39-cp39-win32.whl", hash = "sha256:6bc6f3f4692d806831c136c5acad5ccedd0262aa44c087c46b7101c77e139140"}, + {file = "coverage-7.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:553d7094cb27db58ea91332e8b5681bac107e7242c23f7629ab1316ee73c4981"}, + {file = "coverage-7.3.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:220eb51f5fb38dfdb7e5d54284ca4d0cd70ddac047d750111a68ab1798945194"}, + {file = "coverage-7.3.1.tar.gz", hash = "sha256:6cb7fe1581deb67b782c153136541e20901aa312ceedaf1467dcb35255787952"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "darglint" +version = "1.8.1" +description = "A utility for ensuring Google-style docstrings stay up to date with the source code." +optional = false +python-versions = ">=3.6,<4.0" +files = [ + {file = "darglint-1.8.1-py3-none-any.whl", hash = "sha256:5ae11c259c17b0701618a20c3da343a3eb98b3bc4b5a83d31cdd94f5ebdced8d"}, + {file = "darglint-1.8.1.tar.gz", hash = "sha256:080d5106df149b199822e7ee7deb9c012b49891538f14a11be681044f0bb20da"}, +] + +[[package]] +name = "distlib" version = "0.3.7" description = "Distribution utilities" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "distlib-0.3.7-py2.py3-none-any.whl", hash = "sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057"}, + {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, +] [[package]] name = "docutils" version = "0.20.1" description = "Docutils -- Python Documentation Utilities" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, + {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, +] [[package]] name = "dparse" version = "0.6.3" description = "A parser for Python dependency files" -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "dparse-0.6.3-py3-none-any.whl", hash = "sha256:0d8fe18714056ca632d98b24fbfc4e9791d4e47065285ab486182288813a5318"}, + {file = "dparse-0.6.3.tar.gz", hash = "sha256:27bb8b4bcaefec3997697ba3f6e06b2447200ba273c0b085c3d012a04571b528"}, +] [package.dependencies] packaging = "*" @@ -309,9 +545,12 @@ pipenv = ["pipenv (<=2022.12.19)"] name = "exceptiongroup" version = "1.1.3" description = "Backport of PEP 654 (exception groups)" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, + {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, +] [package.extras] test = ["pytest (>=6)"] @@ -320,9 +559,12 @@ test = ["pytest (>=6)"] name = "filelock" version = "3.12.3" description = "A platform independent file lock." -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"}, + {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"}, +] [package.dependencies] typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.11\""} @@ -335,9 +577,12 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pyt name = "flake8" version = "6.1.0" description = "the modular source code checker: pep8 pyflakes and co" -category = "dev" optional = false python-versions = ">=3.8.1" +files = [ + {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, + {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, +] [package.dependencies] mccabe = ">=0.7.0,<0.8.0" @@ -348,9 +593,12 @@ pyflakes = ">=3.1.0,<3.2.0" name = "flake8-bandit" version = "4.1.1" description = "Automated security testing with bandit and flake8." -category = "dev" optional = false python-versions = ">=3.6" +files = [ + {file = "flake8_bandit-4.1.1-py3-none-any.whl", hash = "sha256:4c8a53eb48f23d4ef1e59293657181a3c989d0077c9952717e98a0eace43e06d"}, + {file = "flake8_bandit-4.1.1.tar.gz", hash = "sha256:068e09287189cbfd7f986e92605adea2067630b75380c6b5733dab7d87f9a84e"}, +] [package.dependencies] bandit = ">=1.7.3" @@ -360,9 +608,12 @@ flake8 = ">=5.0.0" name = "flake8-bugbear" version = "23.7.10" description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle." -category = "dev" optional = false python-versions = ">=3.8.1" +files = [ + {file = "flake8-bugbear-23.7.10.tar.gz", hash = "sha256:0ebdc7d8ec1ca8bd49347694562381f099f4de2f8ec6bda7a7dca65555d9e0d4"}, + {file = "flake8_bugbear-23.7.10-py3-none-any.whl", hash = "sha256:d99d005114020fbef47ed5e4aebafd22f167f9a0fbd0d8bf3c9e90612cb25c34"}, +] [package.dependencies] attrs = ">=19.2.0" @@ -375,9 +626,12 @@ dev = ["coverage", "hypothesis", "hypothesmith (>=0.2)", "pre-commit", "pytest", name = "flake8-docstrings" version = "1.7.0" description = "Extension for flake8 which uses pydocstyle to check docstrings" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "flake8_docstrings-1.7.0-py2.py3-none-any.whl", hash = "sha256:51f2344026da083fc084166a9353f5082b01f72901df422f74b4d953ae88ac75"}, + {file = "flake8_docstrings-1.7.0.tar.gz", hash = "sha256:4c8cc748dc16e6869728699e5d0d685da9a10b0ea718e090b1ba088e67a941af"}, +] [package.dependencies] flake8 = ">=3" @@ -387,9 +641,12 @@ pydocstyle = ">=2.1" name = "flake8-rst-docstrings" version = "0.3.0" description = "Python docstring reStructuredText (RST) validator for flake8" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "flake8-rst-docstrings-0.3.0.tar.gz", hash = "sha256:d1ce22b4bd37b73cd86b8d980e946ef198cfcc18ed82fedb674ceaa2f8d1afa4"}, + {file = "flake8_rst_docstrings-0.3.0-py3-none-any.whl", hash = "sha256:f8c3c6892ff402292651c31983a38da082480ad3ba253743de52989bdc84ca1c"}, +] [package.dependencies] flake8 = ">=3" @@ -403,17 +660,82 @@ develop = ["build", "twine"] name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"}, + {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"}, + {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"}, + {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"}, + {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"}, + {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"}, + {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"}, + {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"}, + {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, + {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, +] [[package]] name = "fsspec" version = "2023.6.0" description = "File-system specification" -category = "main" optional = false python-versions = ">=3.8" +files = [ + {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, + {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, +] [package.dependencies] aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""} @@ -447,9 +769,12 @@ tqdm = ["tqdm"] name = "furo" version = "2023.8.19" description = "A clean customisable Sphinx documentation theme." -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "furo-2023.8.19-py3-none-any.whl", hash = "sha256:12f99f87a1873b6746228cfde18f77244e6c1ffb85d7fed95e638aae70d80590"}, + {file = "furo-2023.8.19.tar.gz", hash = "sha256:e671ee638ab3f1b472f4033b0167f502ab407830e0db0f843b1c1028119c9cd1"}, +] [package.dependencies] beautifulsoup4 = "*" @@ -461,9 +786,12 @@ sphinx-basic-ng = "*" name = "gitdb" version = "4.0.10" description = "Git Object Database" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, + {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, +] [package.dependencies] smmap = ">=3.0.1,<6" @@ -472,9 +800,12 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.35" description = "GitPython is a Python library used to interact with Git repositories" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "GitPython-3.1.35-py3-none-any.whl", hash = "sha256:c19b4292d7a1d3c0f653858db273ff8a6614100d1eb1528b014ec97286193c09"}, + {file = "GitPython-3.1.35.tar.gz", hash = "sha256:9cbefbd1789a5fe9bcf621bb34d3f441f3a90c8461d377f84eda73e721d9b06b"}, +] [package.dependencies] gitdb = ">=4.0.1,<5" @@ -483,9 +814,12 @@ gitdb = ">=4.0.1,<5" name = "huggingface-hub" version = "0.16.4" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" -category = "main" optional = false python-versions = ">=3.7.0" +files = [ + {file = "huggingface_hub-0.16.4-py3-none-any.whl", hash = "sha256:0d3df29932f334fead024afc7cb4cc5149d955238b8b5e42dcf9740d6995a349"}, + {file = "huggingface_hub-0.16.4.tar.gz", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"}, +] [package.dependencies] filelock = "*" @@ -497,24 +831,27 @@ tqdm = ">=4.42.1" typing-extensions = ">=3.7.4.3" [package.extras] -all = ["InquirerPy (==0.3.4)", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "jinja2", "mypy (==0.982)", "numpy", "pillow", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-pyyaml", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] cli = ["InquirerPy (==0.3.4)"] -dev = ["InquirerPy (==0.3.4)", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "jinja2", "mypy (==0.982)", "numpy", "pillow", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-pyyaml", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] inference = ["aiohttp", "pydantic"] quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"] tensorflow = ["graphviz", "pydot", "tensorflow"] -testing = ["InquirerPy (==0.3.4)", "aiohttp", "gradio", "jedi", "jinja2", "numpy", "pillow", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] torch = ["torch"] -typing = ["pydantic", "types-pyyaml", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +typing = ["pydantic", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] [[package]] name = "identify" version = "2.5.27" description = "File identification library for Python" -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "identify-2.5.27-py2.py3-none-any.whl", hash = "sha256:fdb527b2dfe24602809b2201e033c2a113d7bdf716db3ca8e3243f735dcecaba"}, + {file = "identify-2.5.27.tar.gz", hash = "sha256:287b75b04a0e22d727bc9a41f0d4f3c1bcada97490fa6eabb5b28f0e9097e733"}, +] [package.extras] license = ["ukkonen"] @@ -523,25 +860,34 @@ license = ["ukkonen"] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] [[package]] name = "imagesize" version = "1.4.1" description = "Getting image size from png/jpeg/jpeg2000/gif file" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"}, + {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, +] [[package]] name = "importlib-metadata" version = "6.8.0" description = "Read metadata from Python packages" -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "importlib_metadata-6.8.0-py3-none-any.whl", hash = "sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb"}, + {file = "importlib_metadata-6.8.0.tar.gz", hash = "sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743"}, +] [package.dependencies] zipp = ">=0.5" @@ -555,17 +901,23 @@ testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] [[package]] name = "isort" version = "5.12.0" description = "A Python utility / library to sort Python imports." -category = "dev" optional = false python-versions = ">=3.8.0" +files = [ + {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, + {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, +] [package.extras] colors = ["colorama (>=0.4.3)"] @@ -577,9 +929,12 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, + {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, +] [package.dependencies] MarkupSafe = ">=2.0" @@ -591,9 +946,12 @@ i18n = ["Babel (>=2.7)"] name = "lightning-utilities" version = "0.9.0" description = "PyTorch Lightning Sample project." -category = "main" optional = false python-versions = ">=3.7" +files = [ + {file = "lightning-utilities-0.9.0.tar.gz", hash = "sha256:efbf2c488c257f942abdfd06cf646fb84ca215a9663b60081811e22a15ee033b"}, + {file = "lightning_utilities-0.9.0-py3-none-any.whl", hash = "sha256:918dd90c775719e3855631db6282ad75c14da4c5727c4cebdd1589d865fad03d"}, +] [package.dependencies] packaging = ">=17.1" @@ -608,9 +966,12 @@ typing = ["mypy (>=1.0.0)"] name = "livereload" version = "2.6.3" description = "Python LiveReload is an awesome tool for web developers" -category = "dev" optional = false python-versions = "*" +files = [ + {file = "livereload-2.6.3-py2.py3-none-any.whl", hash = "sha256:ad4ac6f53b2d62bb6ce1a5e6e96f1f00976a32348afedcb4b6d68df2a1d346e4"}, + {file = "livereload-2.6.3.tar.gz", hash = "sha256:776f2f865e59fde56490a56bcc6773b6917366bce0c267c60ee8aaf1a0959869"}, +] [package.dependencies] six = "*" @@ -620,1615 +981,33 @@ tornado = {version = "*", markers = "python_version > \"2.7\""} name = "markdown-it-py" version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" -category = "dev" optional = false python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] [package.dependencies] mdurl = ">=0.1,<1.0" [package.extras] benchmarking = ["psutil", "pytest", "pytest-benchmark"] -code_style = ["pre-commit (>=3.0,<4.0)"] +code-style = ["pre-commit (>=3.0,<4.0)"] compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] linkify = ["linkify-it-py (>=1,<3)"] plugins = ["mdit-py-plugins"] profiling = ["gprof2dot"] -rtd = ["jupyter-sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-book-theme", "sphinx-copybutton", "sphinx-design"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] [[package]] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "mccabe" -version = "0.7.0" -description = "McCabe checker, plugin for flake8" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "mdit-py-plugins" -version = "0.4.0" -description = "Collection of plugins for markdown-it-py" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -markdown-it-py = ">=1.0.0,<4.0.0" - -[package.extras] -code_style = ["pre-commit"] -rtd = ["myst-parser", "sphinx-book-theme"] -testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] - -[[package]] -name = "mdurl" -version = "0.1.2" -description = "Markdown URL utilities" -category = "dev" optional = false python-versions = ">=3.7" - -[[package]] -name = "mpmath" -version = "1.3.0" -description = "Python library for arbitrary-precision floating-point arithmetic" -category = "main" -optional = false -python-versions = "*" - -[package.extras] -develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] -docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4)"] -tests = ["pytest (>=4.6)"] - -[[package]] -name = "multidict" -version = "6.0.4" -description = "multidict implementation" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "multiprocess" -version = "0.70.15" -description = "better multiprocessing and multithreading in Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -dill = ">=0.3.7" - -[[package]] -name = "mypy" -version = "1.5.1" -description = "Optional static typing for Python" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -mypy-extensions = ">=1.0.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = ">=4.1.0" - -[package.extras] -dmypy = ["psutil (>=4.0)"] -install-types = ["pip"] -reports = ["lxml"] - -[[package]] -name = "mypy-extensions" -version = "1.0.0" -description = "Type system extensions for programs checked with the mypy type checker." -category = "dev" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "myst-parser" -version = "2.0.0" -description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser," -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -docutils = ">=0.16,<0.21" -jinja2 = "*" -markdown-it-py = ">=3.0,<4.0" -mdit-py-plugins = ">=0.4,<1.0" -pyyaml = "*" -sphinx = ">=6,<8" - -[package.extras] -code_style = ["pre-commit (>=3.0,<4.0)"] -linkify = ["linkify-it-py (>=2.0,<3.0)"] -rtd = ["ipython", "pydata-sphinx-theme (==v0.13.0rc4)", "sphinx-autodoc2 (>=0.4.2,<0.5.0)", "sphinx-book-theme (==1.0.0rc2)", "sphinx-copybutton", "sphinx-design2", "sphinx-pyscript", "sphinx-tippy (>=0.3.1)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.8.2,<0.9.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] -testing = ["beautifulsoup4", "coverage", "pytest (>=7,<8)", "pytest-cov", "pytest-param-files (>=0.3.4,<0.4.0)", "pytest-regressions", "sphinx-pytest"] -testing-docutils = ["pygments", "pytest (>=7,<8)", "pytest-param-files (>=0.3.4,<0.4.0)"] - -[[package]] -name = "networkx" -version = "3.1" -description = "Python package for creating and manipulating graphs and networks" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.extras] -default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"] -developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"] -doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"] -extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"] -test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] - -[[package]] -name = "nodeenv" -version = "1.8.0" -description = "Node.js virtual environment builder" -category = "dev" -optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" - -[[package]] -name = "nox" -version = "2023.4.22" -description = "Flexible test automation." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -argcomplete = ">=1.9.4,<4.0" -colorlog = ">=2.6.1,<7.0.0" -packaging = ">=20.9" -virtualenv = ">=14" - -[package.extras] -tox_to_nox = ["jinja2", "tox (<4)"] - -[[package]] -name = "nox-poetry" -version = "1.0.3" -description = "nox-poetry" -category = "dev" -optional = false -python-versions = ">=3.7,<4.0" - -[package.dependencies] -nox = ">=2020.8.22" -packaging = ">=20.9" -tomlkit = ">=0.7" - -[[package]] -name = "numpy" -version = "1.25.2" -description = "Fundamental package for array computing in Python" -category = "main" -optional = false -python-versions = ">=3.9" - -[[package]] -name = "packaging" -version = "21.3" -description = "Core utilities for Python packages" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" - -[[package]] -name = "pandas" -version = "2.1.0" -description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" -optional = false -python-versions = ">=3.9" - -[package.dependencies] -numpy = [ - {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, -] -python-dateutil = ">=2.8.2" -pytz = ">=2020.1" -tzdata = ">=2022.1" - -[package.extras] -all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] -aws = ["s3fs (>=2022.05.0)"] -clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] -compression = ["zstandard (>=0.17.0)"] -computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] -consortium-standard = ["dataframe-api-compat (>=0.1.7)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] -feather = ["pyarrow (>=7.0.0)"] -fss = ["fsspec (>=2022.05.0)"] -gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] -hdf5 = ["tables (>=3.7.0)"] -html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] -mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] -parquet = ["pyarrow (>=7.0.0)"] -performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] -plot = ["matplotlib (>=3.6.1)"] -postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] -spss = ["pyreadstat (>=1.1.5)"] -sql-other = ["SQLAlchemy (>=1.4.36)"] -test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.8.0)"] - -[[package]] -name = "pathspec" -version = "0.11.2" -description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "pbr" -version = "5.11.1" -description = "Python Build Reasonableness" -category = "dev" -optional = false -python-versions = ">=2.6" - -[[package]] -name = "pep8-naming" -version = "0.13.3" -description = "Check PEP-8 naming conventions, plugin for flake8" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -flake8 = ">=5.0.0" - -[[package]] -name = "platformdirs" -version = "3.10.0" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] - -[[package]] -name = "pluggy" -version = "1.3.0" -description = "plugin and hook calling mechanisms for python" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] - -[[package]] -name = "pre-commit" -version = "3.4.0" -description = "A framework for managing and maintaining multi-language pre-commit hooks." -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -cfgv = ">=2.0.0" -identify = ">=1.0.0" -nodeenv = ">=0.11.1" -pyyaml = ">=5.1" -virtualenv = ">=20.10.0" - -[[package]] -name = "pre-commit-hooks" -version = "4.4.0" -description = "Some out-of-the-box hooks for pre-commit." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -"ruamel.yaml" = ">=0.15" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} - -[[package]] -name = "pyarrow" -version = "13.0.0" -description = "Python library for Apache Arrow" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -numpy = ">=1.16.6" - -[[package]] -name = "pycodestyle" -version = "2.11.0" -description = "Python style guide checker" -category = "dev" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "pydocstyle" -version = "6.3.0" -description = "Python docstring style checker" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -snowballstemmer = ">=2.2.0" - -[package.extras] -toml = ["tomli (>=1.2.3)"] - -[[package]] -name = "pyflakes" -version = "3.1.0" -description = "passive checker of Python programs" -category = "dev" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "pygments" -version = "2.16.1" -description = "Pygments is a syntax highlighting package written in Python." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.extras] -plugins = ["importlib-metadata"] - -[[package]] -name = "pyparsing" -version = "3.1.1" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "main" -optional = false -python-versions = ">=3.6.8" - -[package.extras] -diagrams = ["jinja2", "railroad-diagrams"] - -[[package]] -name = "pytest" -version = "7.4.2" -description = "pytest: simple powerful testing with Python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} - -[package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] - -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "python-dotenv" -version = "0.20.0" -description = "Read key-value pairs from a .env file and set them as environment variables" -category = "dev" -optional = false -python-versions = ">=3.5" - -[package.extras] -cli = ["click (>=5.0)"] - -[[package]] -name = "pytorch-lightning" -version = "2.0.8" -description = "PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers. Scale your models. Write less boilerplate." -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -fsspec = {version = ">2021.06.0", extras = ["http"]} -lightning-utilities = ">=0.7.0" -numpy = ">=1.17.2" -packaging = ">=17.1" -PyYAML = ">=5.4" -torch = ">=1.11.0" -torchmetrics = ">=0.7.0" -tqdm = ">=4.57.0" -typing-extensions = ">=4.0.0" - -[package.extras] -all = ["deepspeed (>=0.8.2)", "gym[classic_control] (>=0.17.0)", "hydra-core (>=1.0.5)", "ipython[all] (<8.14.1)", "jsonargparse[signatures] (>=4.18.0,<4.23.0)", "lightning-utilities (>=0.7.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "rich (>=12.3.0)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchvision (>=0.12.0)"] -deepspeed = ["deepspeed (>=0.8.2)"] -dev = ["cloudpickle (>=1.3)", "coverage (==7.3.0)", "deepspeed (>=0.8.2)", "fastapi (<0.100.0)", "gym[classic_control] (>=0.17.0)", "hydra-core (>=1.0.5)", "ipython[all] (<8.14.1)", "jsonargparse[signatures] (>=4.18.0,<4.23.0)", "lightning-utilities (>=0.7.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "onnx (<1.15.0)", "onnxruntime (<1.16.0)", "pandas (>1.0)", "protobuf (<=3.20.1)", "psutil (<5.9.6)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-forked (==1.4.0)", "pytest-rerunfailures (==10.3)", "rich (>=12.3.0)", "scikit-learn (>0.22.1)", "tensorboard (>=2.9.1)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchvision (>=0.12.0)", "uvicorn (<0.23.3)"] -examples = ["gym[classic_control] (>=0.17.0)", "ipython[all] (<8.14.1)", "lightning-utilities (>=0.7.0)", "torchmetrics (>=0.10.0)", "torchvision (>=0.12.0)"] -extra = ["hydra-core (>=1.0.5)", "jsonargparse[signatures] (>=4.18.0,<4.23.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "rich (>=12.3.0)", "tensorboardX (>=2.2)"] -strategies = ["deepspeed (>=0.8.2)"] -test = ["cloudpickle (>=1.3)", "coverage (==7.3.0)", "fastapi (<0.100.0)", "onnx (<1.15.0)", "onnxruntime (<1.16.0)", "pandas (>1.0)", "protobuf (<=3.20.1)", "psutil (<5.9.6)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-forked (==1.4.0)", "pytest-rerunfailures (==10.3)", "scikit-learn (>0.22.1)", "tensorboard (>=2.9.1)", "uvicorn (<0.23.3)"] - -[[package]] -name = "pytz" -version = "2023.3.post1" -description = "World timezone definitions, modern and historical" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "pyupgrade" -version = "3.10.1" -description = "A tool to automatically upgrade syntax for newer versions." -category = "dev" -optional = false -python-versions = ">=3.8.1" - -[package.dependencies] -tokenize-rt = ">=5.2.0" - -[[package]] -name = "pyyaml" -version = "6.0.1" -description = "YAML parser and emitter for Python" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "regex" -version = "2023.8.8" -description = "Alternative regular expression module, to replace re." -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "requests" -version = "2.31.0" -description = "Python HTTP for Humans." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "restructuredtext-lint" -version = "1.4.0" -description = "reStructuredText linter" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -docutils = ">=0.11,<1.0" - -[[package]] -name = "rich" -version = "13.5.2" -description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "dev" -optional = false -python-versions = ">=3.7.0" - -[package.dependencies] -markdown-it-py = ">=2.2.0" -pygments = ">=2.13.0,<3.0.0" - -[package.extras] -jupyter = ["ipywidgets (>=7.5.1,<9)"] - -[[package]] -name = "ruamel.yaml" -version = "0.17.32" -description = "ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order" -category = "dev" -optional = false -python-versions = ">=3" - -[package.dependencies] -"ruamel.yaml.clib" = {version = ">=0.2.7", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.12\""} - -[package.extras] -docs = ["ryd"] -jinja2 = ["ruamel.yaml.jinja2 (>=0.2)"] - -[[package]] -name = "ruamel.yaml.clib" -version = "0.2.7" -description = "C version of reader, parser and emitter for ruamel.yaml derived from libyaml" -category = "dev" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "safetensors" -version = "0.3.3.post1" -description = "" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -all = ["safetensors", "safetensors", "safetensors", "safetensors", "safetensors", "safetensors", "safetensors"] -dev = ["safetensors"] -jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors"] -numpy = ["numpy (>=1.21.6)"] -paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors"] -pinned-tf = ["safetensors", "tensorflow (==2.11.0)"] -quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] -tensorflow = ["safetensors", "tensorflow (>=2.11.0)"] -testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors", "setuptools_rust (>=1.5.2)"] -torch = ["safetensors", "torch (>=1.10)"] - -[[package]] -name = "safety" -version = "2.3.5" -description = "Checks installed dependencies for known vulnerabilities and licenses." -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -Click = ">=8.0.2" -dparse = ">=0.6.2" -packaging = ">=21.0,<22.0" -requests = "*" -"ruamel.yaml" = ">=0.17.21" - -[package.extras] -github = ["jinja2 (>=3.1.0)", "pygithub (>=1.43.3)"] -gitlab = ["python-gitlab (>=1.3.0)"] - -[[package]] -name = "sh" -version = "2.0.6" -description = "Python subprocess replacement" -category = "dev" -optional = false -python-versions = ">=3.8.1,<4.0" - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "smmap" -version = "5.0.0" -description = "A pure Python implementation of a sliding window memory map manager" -category = "dev" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "snowballstemmer" -version = "2.2.0" -description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "soupsieve" -version = "2.5" -description = "A modern CSS selector implementation for Beautiful Soup." -category = "dev" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "sphinx" -version = "7.2.5" -description = "Python documentation generator" -category = "dev" -optional = false -python-versions = ">=3.9" - -[package.dependencies] -alabaster = ">=0.7,<0.8" -babel = ">=2.9" -colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} -docutils = ">=0.18.1,<0.21" -imagesize = ">=1.3" -importlib-metadata = {version = ">=4.8", markers = "python_version < \"3.10\""} -Jinja2 = ">=3.0" -packaging = ">=21.0" -Pygments = ">=2.14" -requests = ">=2.25.0" -snowballstemmer = ">=2.0" -sphinxcontrib-applehelp = "*" -sphinxcontrib-devhelp = "*" -sphinxcontrib-htmlhelp = ">=2.0.0" -sphinxcontrib-jsmath = "*" -sphinxcontrib-qthelp = "*" -sphinxcontrib-serializinghtml = ">=1.1.9" - -[package.extras] -docs = ["sphinxcontrib-websupport"] -lint = ["docutils-stubs", "flake8 (>=3.5.0)", "flake8-simplify", "isort", "mypy (>=0.990)", "ruff", "sphinx-lint", "types-requests"] -test = ["cython (>=3.0)", "filelock", "html5lib", "pytest (>=4.6)", "setuptools (>=67.0)"] - -[[package]] -name = "sphinx-autobuild" -version = "2021.3.14" -description = "Rebuild Sphinx documentation on changes, with live-reload in the browser." -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -colorama = "*" -livereload = "*" -sphinx = "*" - -[package.extras] -test = ["pytest", "pytest-cov"] - -[[package]] -name = "sphinx-basic-ng" -version = "1.0.0b2" -description = "A modern skeleton for Sphinx themes." -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -sphinx = ">=4.0" - -[package.extras] -docs = ["furo", "ipython", "myst-parser", "sphinx-copybutton", "sphinx-inline-tabs"] - -[[package]] -name = "sphinx-click" -version = "5.0.1" -description = "Sphinx extension that automatically documents click applications" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -click = ">=7.0" -docutils = "*" -sphinx = ">=2.0" - -[[package]] -name = "sphinxcontrib-applehelp" -version = "1.0.7" -description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" -category = "dev" -optional = false -python-versions = ">=3.9" - -[package.dependencies] -Sphinx = ">=5" - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-devhelp" -version = "1.0.5" -description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents" -category = "dev" -optional = false -python-versions = ">=3.9" - -[package.dependencies] -Sphinx = ">=5" - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-htmlhelp" -version = "2.0.4" -description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" -category = "dev" -optional = false -python-versions = ">=3.9" - -[package.dependencies] -Sphinx = ">=5" - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["html5lib", "pytest"] - -[[package]] -name = "sphinxcontrib-jsmath" -version = "1.0.1" -description = "A sphinx extension which renders display math in HTML via JavaScript" -category = "dev" -optional = false -python-versions = ">=3.5" - -[package.extras] -test = ["flake8", "mypy", "pytest"] - -[[package]] -name = "sphinxcontrib-qthelp" -version = "1.0.6" -description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents" -category = "dev" -optional = false -python-versions = ">=3.9" - -[package.dependencies] -Sphinx = ">=5" - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "sphinxcontrib-serializinghtml" -version = "1.1.9" -description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)" -category = "dev" -optional = false -python-versions = ">=3.9" - -[package.dependencies] -Sphinx = ">=5" - -[package.extras] -lint = ["docutils-stubs", "flake8", "mypy"] -test = ["pytest"] - -[[package]] -name = "stevedore" -version = "5.1.0" -description = "Manage dynamic plugins for Python applications" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -pbr = ">=2.0.0,<2.1.0 || >2.1.0" - -[[package]] -name = "sympy" -version = "1.12" -description = "Computer algebra system (CAS) in Python" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -mpmath = ">=0.19" - -[[package]] -name = "tokenize-rt" -version = "5.2.0" -description = "A wrapper around the stdlib `tokenize` which roundtrips." -category = "dev" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "tokenizers" -version = "0.13.3" -description = "Fast and Customizable Tokenizers" -category = "main" -optional = false -python-versions = "*" - -[package.extras] -dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] -docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] -testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] - -[[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "tomlkit" -version = "0.12.1" -description = "Style preserving TOML library" -category = "dev" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "torch" -version = "2.0.1" -description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -category = "main" -optional = false -python-versions = ">=3.8.0" - -[package.dependencies] -filelock = "*" -jinja2 = "*" -networkx = "*" -sympy = "*" -typing-extensions = "*" - -[package.extras] -opt-einsum = ["opt-einsum (>=3.3)"] - -[[package]] -name = "torchmetrics" -version = "1.1.1" -description = "PyTorch native Metrics" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -lightning-utilities = ">=0.8.0" -numpy = ">1.20.0" -torch = ">=1.8.1" - -[package.extras] -all = ["SciencePlots (>=2.0.0)", "lpips (<=0.1.4)", "matplotlib (>=3.2.0)", "mypy (==1.5.1)", "nltk (>=3.6)", "piq (<=0.8.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "regex (>=2021.9.24)", "scipy (>1.0.0)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-emoji", "types-protobuf", "types-pyyaml", "types-requests", "types-setuptools", "types-six", "types-tabulate"] -audio = ["pystoi (>=0.3.0)", "torchaudio (>=0.10.0)"] -detection = ["pycocotools (>2.0.0)", "torchvision (>=0.8)"] -dev = ["SciencePlots (>=2.0.0)", "bert-score (==0.3.13)", "cloudpickle (>1.3)", "coverage (==7.3.0)", "dython (<=0.7.4)", "fairlearn", "fast-bss-eval (>=0.1.0)", "fire (<=0.5.0)", "huggingface-hub (<0.16)", "jiwer (>=2.3.0)", "kornia (>=0.6.7)", "lpips (<=0.1.4)", "matplotlib (>=3.2.0)", "mir-eval (>=0.6)", "mypy (==1.5.1)", "netcal (>1.0.0)", "nltk (>=3.6)", "numpy (<1.25.0)", "pandas (>1.0.0)", "pandas (>=1.4.0)", "phmdoctest (==1.4.0)", "piq (<=0.8.0)", "psutil (<=5.9.5)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-doctestplus (==1.0.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "pytorch-msssim (==1.0.0)", "regex (>=2021.9.24)", "requests (<=2.31.0)", "rouge-score (>0.1.0)", "sacrebleu (>=2.0.0)", "scikit-image (>=0.19.0)", "scikit-learn (>=1.1.1)", "scipy (>1.0.0)", "sewar (>=0.4.4)", "statsmodels (>0.13.5)", "torch-complex (<=0.4.3)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-emoji", "types-protobuf", "types-pyyaml", "types-requests", "types-setuptools", "types-six", "types-tabulate"] -image = ["lpips (<=0.1.4)", "scipy (>1.0.0)", "torch-fidelity (<=0.4.0)", "torchvision (>=0.8)"] -multimodal = ["piq (<=0.8.0)", "transformers (>=4.10.0)"] -test = ["bert-score (==0.3.13)", "cloudpickle (>1.3)", "coverage (==7.3.0)", "dython (<=0.7.4)", "fairlearn", "fast-bss-eval (>=0.1.0)", "fire (<=0.5.0)", "huggingface-hub (<0.16)", "jiwer (>=2.3.0)", "kornia (>=0.6.7)", "mir-eval (>=0.6)", "netcal (>1.0.0)", "numpy (<1.25.0)", "pandas (>1.0.0)", "pandas (>=1.4.0)", "phmdoctest (==1.4.0)", "psutil (<=5.9.5)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-doctestplus (==1.0.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "pytorch-msssim (==1.0.0)", "requests (<=2.31.0)", "rouge-score (>0.1.0)", "sacrebleu (>=2.0.0)", "scikit-image (>=0.19.0)", "scikit-learn (>=1.1.1)", "scipy (>1.0.0)", "sewar (>=0.4.4)", "statsmodels (>0.13.5)", "torch-complex (<=0.4.3)"] -text = ["nltk (>=3.6)", "regex (>=2021.9.24)", "tqdm (>=4.41.0)", "transformers (>4.4.0)"] -typing = ["mypy (==1.5.1)", "types-emoji", "types-protobuf", "types-pyyaml", "types-requests", "types-setuptools", "types-six", "types-tabulate"] -visual = ["SciencePlots (>=2.0.0)", "matplotlib (>=3.2.0)"] - -[[package]] -name = "tornado" -version = "6.3.3" -description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -category = "dev" -optional = false -python-versions = ">= 3.8" - -[[package]] -name = "tqdm" -version = "4.66.1" -description = "Fast, Extensible Progress Meter" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[package.extras] -dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] -notebook = ["ipywidgets (>=6)"] -slack = ["slack-sdk"] -telegram = ["requests"] - -[[package]] -name = "transformers" -version = "4.33.1" -description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" -category = "main" -optional = false -python-versions = ">=3.8.0" - -[package.dependencies] -filelock = "*" -huggingface-hub = ">=0.15.1,<1.0" -numpy = ">=1.17" -packaging = ">=20.0" -pyyaml = ">=5.1" -regex = "!=2019.12.17" -requests = "*" -safetensors = ">=0.3.1" -tokenizers = ">=0.11.1,<0.11.3 || >0.11.3,<0.14" -tqdm = ">=4.27" - -[package.extras] -accelerate = ["accelerate (>=0.20.3)"] -agents = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.10,!=1.12.0)"] -all = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"] -audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] -codecarbon = ["codecarbon (==1.2.0)"] -deepspeed = ["accelerate (>=0.20.3)", "deepspeed (>=0.9.3)"] -deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"] -dev = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] -dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "urllib3 (<2.0.0)"] -dev-torch = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] -docs = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"] -docs_specific = ["hf-doc-builder"] -fairscale = ["fairscale (>0.3)"] -flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"] -flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] -ftfy = ["ftfy"] -integrations = ["optuna", "ray", "sigopt"] -ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] -modelcreation = ["cookiecutter (==1.7.3)"] -natten = ["natten (>=0.14.6)"] -onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] -onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] -optuna = ["optuna"] -quality = ["GitPython (<3.1.19)", "black (>=23.1,<24.0)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (>=0.0.241,<=0.0.259)", "urllib3 (<2.0.0)"] -ray = ["ray"] -retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] -sagemaker = ["sagemaker (>=2.31.0)"] -sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"] -serving = ["fastapi", "pydantic (<2)", "starlette", "uvicorn"] -sigopt = ["sigopt"] -sklearn = ["scikit-learn"] -speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] -testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "timeout-decorator"] -tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"] -tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"] -tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] -timm = ["timm"] -tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.14)"] -torch = ["accelerate (>=0.20.3)", "torch (>=1.10,!=1.12.0)"] -torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] -torch-vision = ["Pillow (<10.0.0)", "torchvision"] -torchhub = ["filelock", "huggingface-hub (>=0.15.1,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "tqdm (>=4.27)"] -video = ["av (==9.2.0)", "decord (==0.6.0)"] -vision = ["Pillow (<10.0.0)"] - -[[package]] -name = "typeguard" -version = "4.1.3" -description = "Run-time type checker for Python" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} -typing-extensions = {version = ">=4.7.0", markers = "python_version < \"3.12\""} - -[package.extras] -doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)"] -test = ["coverage[toml] (>=7)", "mypy (>=1.2.0)", "pytest (>=7)"] - -[[package]] -name = "types-requests" -version = "2.31.0.2" -description = "Typing stubs for requests" -category = "dev" -optional = false -python-versions = "*" - -[package.dependencies] -types-urllib3 = "*" - -[[package]] -name = "types-urllib3" -version = "1.26.25.14" -description = "Typing stubs for urllib3" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "typing-extensions" -version = "4.7.1" -description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "tzdata" -version = "2023.3" -description = "Provider of IANA time zone data" -category = "main" -optional = false -python-versions = ">=2" - -[[package]] -name = "urllib3" -version = "2.0.4" -description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - -[[package]] -name = "virtualenv" -version = "20.24.4" -description = "Virtual Python Environment builder" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -distlib = ">=0.3.7,<1" -filelock = ">=3.12.2,<4" -platformdirs = ">=3.9.1,<4" - -[package.extras] -docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] - -[[package]] -name = "xdoctest" -version = "1.1.1" -description = "A rewrite of the builtin doctest module" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -colorama = {version = "*", optional = true, markers = "platform_system == \"Windows\" and extra == \"colors\""} -Pygments = {version = "*", optional = true, markers = "python_version >= \"3.5.0\" and extra == \"colors\""} -six = "*" - -[package.extras] -all = ["attrs", "codecov", "colorama", "debugpy", "debugpy", "debugpy", "debugpy", "debugpy", "ipykernel", "ipykernel", "ipython", "ipython", "ipython-genutils", "jedi", "jinja2", "jupyter-client", "jupyter-client", "jupyter-core", "nbconvert", "pyflakes", "pygments", "pygments", "pytest", "pytest", "pytest", "pytest-cov", "six", "tomli", "typing"] -all-strict = ["IPython (==7.10.0)", "IPython (==7.23.1)", "Pygments (==2.0.0)", "Pygments (==2.4.1)", "attrs (==19.2.0)", "codecov (==2.0.15)", "colorama (==0.4.1)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.3.0)", "debugpy (==1.6.0)", "ipykernel (==5.2.0)", "ipykernel (==6.0.0)", "ipython-genutils (==0.2.0)", "jedi (==0.16)", "jinja2 (==3.0.0)", "jupyter-client (==6.1.5)", "jupyter-client (==7.0.0)", "jupyter-core (==4.7.0)", "nbconvert (==6.0.0)", "pyflakes (==2.2.0)", "pytest (==4.6.0)", "pytest (==4.6.0)", "pytest (==6.2.5)", "pytest-cov (==3.0.0)", "six (==1.11.0)", "tomli (==0.2.0)", "typing (==3.7.4)"] -colors = ["colorama", "pygments", "pygments"] -jupyter = ["attrs", "debugpy", "debugpy", "debugpy", "debugpy", "debugpy", "ipykernel", "ipykernel", "ipython", "ipython", "ipython-genutils", "jedi", "jinja2", "jupyter-client", "jupyter-client", "jupyter-core", "nbconvert"] -optional = ["attrs", "colorama", "debugpy", "debugpy", "debugpy", "debugpy", "debugpy", "ipykernel", "ipykernel", "ipython", "ipython", "ipython-genutils", "jedi", "jinja2", "jupyter-client", "jupyter-client", "jupyter-core", "nbconvert", "pyflakes", "pygments", "pygments", "tomli"] -optional-strict = ["IPython (==7.10.0)", "IPython (==7.23.1)", "Pygments (==2.0.0)", "Pygments (==2.4.1)", "attrs (==19.2.0)", "colorama (==0.4.1)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.3.0)", "debugpy (==1.6.0)", "ipykernel (==5.2.0)", "ipykernel (==6.0.0)", "ipython-genutils (==0.2.0)", "jedi (==0.16)", "jinja2 (==3.0.0)", "jupyter-client (==6.1.5)", "jupyter-client (==7.0.0)", "jupyter-core (==4.7.0)", "nbconvert (==6.0.0)", "pyflakes (==2.2.0)", "tomli (==0.2.0)"] -runtime-strict = ["six (==1.11.0)"] -tests = ["codecov", "pytest", "pytest", "pytest", "pytest-cov", "typing"] -tests-binary = ["cmake", "cmake", "ninja", "ninja", "pybind11", "pybind11", "scikit-build", "scikit-build"] -tests-binary-strict = ["cmake (==3.21.2)", "cmake (==3.25.0)", "ninja (==1.10.2)", "ninja (==1.11.1)", "pybind11 (==2.10.3)", "pybind11 (==2.7.1)", "scikit-build (==0.11.1)", "scikit-build (==0.16.1)"] -tests-strict = ["codecov (==2.0.15)", "pytest (==4.6.0)", "pytest (==4.6.0)", "pytest (==6.2.5)", "pytest-cov (==3.0.0)", "typing (==3.7.4)"] - -[[package]] -name = "xxhash" -version = "3.3.0" -description = "Python binding for xxHash" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "yarl" -version = "1.9.2" -description = "Yet another URL library" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -idna = ">=2.0" -multidict = ">=4.0" - -[[package]] -name = "zipp" -version = "3.16.2" -description = "Backport of pathlib-compatible object wrapper for zip files" -category = "dev" -optional = false -python-versions = ">=3.8" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-o", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] - -[metadata] -lock-version = "1.1" -python-versions = "^3.9" -content-hash = "1dc12d9e2b71c8a7b78a56aaa355ae96fad4f3b36458a61b320b95189d160dc8" - -[metadata.files] -absl-py = [ - {file = "absl-py-1.4.0.tar.gz", hash = "sha256:d2c244d01048ba476e7c080bd2c6df5e141d211de80223460d5b3b8a2a58433d"}, - {file = "absl_py-1.4.0-py3-none-any.whl", hash = "sha256:0d3fe606adfa4f7db64792dd4c7aee4ee0c38ab75dfd353b7a83ed3e957fcb47"}, -] -aiohttp = [ - {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a94159871304770da4dd371f4291b20cac04e8c94f11bdea1c3478e557fbe0d8"}, - {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13bf85afc99ce6f9ee3567b04501f18f9f8dbbb2ea11ed1a2e079670403a7c84"}, - {file = "aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ce2ac5708501afc4847221a521f7e4b245abf5178cf5ddae9d5b3856ddb2f3a"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96943e5dcc37a6529d18766597c491798b7eb7a61d48878611298afc1fca946c"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ad5c3c4590bb3cc28b4382f031f3783f25ec223557124c68754a2231d989e2b"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c413c633d0512df4dc7fd2373ec06cc6a815b7b6d6c2f208ada7e9e93a5061d"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df72ac063b97837a80d80dec8d54c241af059cc9bb42c4de68bd5b61ceb37caa"}, - {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c48c5c0271149cfe467c0ff8eb941279fd6e3f65c9a388c984e0e6cf57538e14"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:368a42363c4d70ab52c2c6420a57f190ed3dfaca6a1b19afda8165ee16416a82"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7607ec3ce4993464368505888af5beb446845a014bc676d349efec0e05085905"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0d21c684808288a98914e5aaf2a7c6a3179d4df11d249799c32d1808e79503b5"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:312fcfbacc7880a8da0ae8b6abc6cc7d752e9caa0051a53d217a650b25e9a691"}, - {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad093e823df03bb3fd37e7dec9d4670c34f9e24aeace76808fc20a507cace825"}, - {file = "aiohttp-3.8.5-cp310-cp310-win32.whl", hash = "sha256:33279701c04351a2914e1100b62b2a7fdb9a25995c4a104259f9a5ead7ed4802"}, - {file = "aiohttp-3.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:6e4a280e4b975a2e7745573e3fc9c9ba0d1194a3738ce1cbaa80626cc9b4f4df"}, - {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae871a964e1987a943d83d6709d20ec6103ca1eaf52f7e0d36ee1b5bebb8b9b9"}, - {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:461908b2578955045efde733719d62f2b649c404189a09a632d245b445c9c975"}, - {file = "aiohttp-3.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72a860c215e26192379f57cae5ab12b168b75db8271f111019509a1196dfc780"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc14be025665dba6202b6a71cfcdb53210cc498e50068bc088076624471f8bb9"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8af740fc2711ad85f1a5c034a435782fbd5b5f8314c9a3ef071424a8158d7f6b"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:841cd8233cbd2111a0ef0a522ce016357c5e3aff8a8ce92bcfa14cef890d698f"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ed1c46fb119f1b59304b5ec89f834f07124cd23ae5b74288e364477641060ff"}, - {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84f8ae3e09a34f35c18fa57f015cc394bd1389bce02503fb30c394d04ee6b938"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62360cb771707cb70a6fd114b9871d20d7dd2163a0feafe43fd115cfe4fe845e"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:23fb25a9f0a1ca1f24c0a371523546366bb642397c94ab45ad3aedf2941cec6a"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0ba0d15164eae3d878260d4c4df859bbdc6466e9e6689c344a13334f988bb53"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5d20003b635fc6ae3f96d7260281dfaf1894fc3aa24d1888a9b2628e97c241e5"}, - {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0175d745d9e85c40dcc51c8f88c74bfbaef9e7afeeeb9d03c37977270303064c"}, - {file = "aiohttp-3.8.5-cp311-cp311-win32.whl", hash = "sha256:2e1b1e51b0774408f091d268648e3d57f7260c1682e7d3a63cb00d22d71bb945"}, - {file = "aiohttp-3.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:043d2299f6dfdc92f0ac5e995dfc56668e1587cea7f9aa9d8a78a1b6554e5755"}, - {file = "aiohttp-3.8.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cae533195e8122584ec87531d6df000ad07737eaa3c81209e85c928854d2195c"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f21e83f355643c345177a5d1d8079f9f28b5133bcd154193b799d380331d5d3"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a75ef35f2df54ad55dbf4b73fe1da96f370e51b10c91f08b19603c64004acc"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e2e9839e14dd5308ee773c97115f1e0a1cb1d75cbeeee9f33824fa5144c7634"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44e65da1de4403d0576473e2344828ef9c4c6244d65cf4b75549bb46d40b8dd"}, - {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78d847e4cde6ecc19125ccbc9bfac4a7ab37c234dd88fbb3c5c524e8e14da543"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:c7a815258e5895d8900aec4454f38dca9aed71085f227537208057853f9d13f2"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:8b929b9bd7cd7c3939f8bcfffa92fae7480bd1aa425279d51a89327d600c704d"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:5db3a5b833764280ed7618393832e0853e40f3d3e9aa128ac0ba0f8278d08649"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:a0215ce6041d501f3155dc219712bc41252d0ab76474615b9700d63d4d9292af"}, - {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:fd1ed388ea7fbed22c4968dd64bab0198de60750a25fe8c0c9d4bef5abe13824"}, - {file = "aiohttp-3.8.5-cp36-cp36m-win32.whl", hash = "sha256:6e6783bcc45f397fdebc118d772103d751b54cddf5b60fbcc958382d7dd64f3e"}, - {file = "aiohttp-3.8.5-cp36-cp36m-win_amd64.whl", hash = "sha256:b5411d82cddd212644cf9360879eb5080f0d5f7d809d03262c50dad02f01421a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:01d4c0c874aa4ddfb8098e85d10b5e875a70adc63db91f1ae65a4b04d3344cda"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5980a746d547a6ba173fd5ee85ce9077e72d118758db05d229044b469d9029a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a482e6da906d5e6e653be079b29bc173a48e381600161c9932d89dfae5942ef"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80bd372b8d0715c66c974cf57fe363621a02f359f1ec81cba97366948c7fc873"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1161b345c0a444ebcf46bf0a740ba5dcf50612fd3d0528883fdc0eff578006a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd56db019015b6acfaaf92e1ac40eb8434847d9bf88b4be4efe5bfd260aee692"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:153c2549f6c004d2754cc60603d4668899c9895b8a89397444a9c4efa282aaf4"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4a01951fabc4ce26ab791da5f3f24dca6d9a6f24121746eb19756416ff2d881b"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bfb9162dcf01f615462b995a516ba03e769de0789de1cadc0f916265c257e5d8"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:7dde0009408969a43b04c16cbbe252c4f5ef4574ac226bc8815cd7342d2028b6"}, - {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4149d34c32f9638f38f544b3977a4c24052042affa895352d3636fa8bffd030a"}, - {file = "aiohttp-3.8.5-cp37-cp37m-win32.whl", hash = "sha256:68c5a82c8779bdfc6367c967a4a1b2aa52cd3595388bf5961a62158ee8a59e22"}, - {file = "aiohttp-3.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2cf57fb50be5f52bda004b8893e63b48530ed9f0d6c96c84620dc92fe3cd9b9d"}, - {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:eca4bf3734c541dc4f374ad6010a68ff6c6748f00451707f39857f429ca36ced"}, - {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1274477e4c71ce8cfe6c1ec2f806d57c015ebf84d83373676036e256bc55d690"}, - {file = "aiohttp-3.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:28c543e54710d6158fc6f439296c7865b29e0b616629767e685a7185fab4a6b9"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:910bec0c49637d213f5d9877105d26e0c4a4de2f8b1b29405ff37e9fc0ad52b8"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5443910d662db951b2e58eb70b0fbe6b6e2ae613477129a5805d0b66c54b6cb7"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e460be6978fc24e3df83193dc0cc4de46c9909ed92dd47d349a452ef49325b7"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb1558def481d84f03b45888473fc5a1f35747b5f334ef4e7a571bc0dfcb11f8"}, - {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34dd0c107799dcbbf7d48b53be761a013c0adf5571bf50c4ecad5643fe9cfcd0"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aa1990247f02a54185dc0dff92a6904521172a22664c863a03ff64c42f9b5410"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0e584a10f204a617d71d359fe383406305a4b595b333721fa50b867b4a0a1548"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a3cf433f127efa43fee6b90ea4c6edf6c4a17109d1d037d1a52abec84d8f2e42"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c11f5b099adafb18e65c2c997d57108b5bbeaa9eeee64a84302c0978b1ec948b"}, - {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:84de26ddf621d7ac4c975dbea4c945860e08cccde492269db4e1538a6a6f3c35"}, - {file = "aiohttp-3.8.5-cp38-cp38-win32.whl", hash = "sha256:ab88bafedc57dd0aab55fa728ea10c1911f7e4d8b43e1d838a1739f33712921c"}, - {file = "aiohttp-3.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:5798a9aad1879f626589f3df0f8b79b3608a92e9beab10e5fda02c8a2c60db2e"}, - {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a6ce61195c6a19c785df04e71a4537e29eaa2c50fe745b732aa937c0c77169f3"}, - {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:773dd01706d4db536335fcfae6ea2440a70ceb03dd3e7378f3e815b03c97ab51"}, - {file = "aiohttp-3.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f83a552443a526ea38d064588613aca983d0ee0038801bc93c0c916428310c28"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f7372f7341fcc16f57b2caded43e81ddd18df53320b6f9f042acad41f8e049a"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea353162f249c8097ea63c2169dd1aa55de1e8fecbe63412a9bc50816e87b761"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d47ae48db0b2dcf70bc8a3bc72b3de86e2a590fc299fdbbb15af320d2659de"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d827176898a2b0b09694fbd1088c7a31836d1a505c243811c87ae53a3f6273c1"}, - {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3562b06567c06439d8b447037bb655ef69786c590b1de86c7ab81efe1c9c15d8"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4e874cbf8caf8959d2adf572a78bba17cb0e9d7e51bb83d86a3697b686a0ab4d"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6809a00deaf3810e38c628e9a33271892f815b853605a936e2e9e5129762356c"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:33776e945d89b29251b33a7e7d006ce86447b2cfd66db5e5ded4e5cd0340585c"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eaeed7abfb5d64c539e2db173f63631455f1196c37d9d8d873fc316470dfbacd"}, - {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e91d635961bec2d8f19dfeb41a539eb94bd073f075ca6dae6c8dc0ee89ad6f91"}, - {file = "aiohttp-3.8.5-cp39-cp39-win32.whl", hash = "sha256:00ad4b6f185ec67f3e6562e8a1d2b69660be43070bd0ef6fcec5211154c7df67"}, - {file = "aiohttp-3.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:c0a9034379a37ae42dea7ac1e048352d96286626251862e448933c0f59cbd79c"}, - {file = "aiohttp-3.8.5.tar.gz", hash = "sha256:b9552ec52cc147dbf1944ac7ac98af7602e51ea2dcd076ed194ca3c0d1c7d0bc"}, -] -aiosignal = [ - {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, - {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, -] -alabaster = [ - {file = "alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3"}, - {file = "alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2"}, -] -argcomplete = [ - {file = "argcomplete-3.1.1-py3-none-any.whl", hash = "sha256:35fa893a88deea85ea7b20d241100e64516d6af6d7b0ae2bed1d263d26f70948"}, - {file = "argcomplete-3.1.1.tar.gz", hash = "sha256:6c4c563f14f01440aaffa3eae13441c5db2357b5eec639abe7c0b15334627dff"}, -] -async-timeout = [ - {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, - {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, -] -attrs = [ - {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, - {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, -] -babel = [ - {file = "Babel-2.12.1-py3-none-any.whl", hash = "sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610"}, - {file = "Babel-2.12.1.tar.gz", hash = "sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455"}, -] -bandit = [ - {file = "bandit-1.7.5-py3-none-any.whl", hash = "sha256:75665181dc1e0096369112541a056c59d1c5f66f9bb74a8d686c3c362b83f549"}, - {file = "bandit-1.7.5.tar.gz", hash = "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e"}, -] -beautifulsoup4 = [ - {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, - {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, -] -black = [ - {file = "black-22.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eedd20838bd5d75b80c9f5487dbcb06836a43833a37846cf1d8c1cc01cef59d"}, - {file = "black-22.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:159a46a4947f73387b4d83e87ea006dbb2337eab6c879620a3ba52699b1f4351"}, - {file = "black-22.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d30b212bffeb1e252b31dd269dfae69dd17e06d92b87ad26e23890f3efea366f"}, - {file = "black-22.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:7412e75863aa5c5411886804678b7d083c7c28421210180d67dfd8cf1221e1f4"}, - {file = "black-22.12.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c116eed0efb9ff870ded8b62fe9f28dd61ef6e9ddd28d83d7d264a38417dcee2"}, - {file = "black-22.12.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1f58cbe16dfe8c12b7434e50ff889fa479072096d79f0a7f25e4ab8e94cd8350"}, - {file = "black-22.12.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77d86c9f3db9b1bf6761244bc0b3572a546f5fe37917a044e02f3166d5aafa7d"}, - {file = "black-22.12.0-cp38-cp38-win_amd64.whl", hash = "sha256:82d9fe8fee3401e02e79767016b4907820a7dc28d70d137eb397b92ef3cc5bfc"}, - {file = "black-22.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101c69b23df9b44247bd88e1d7e90154336ac4992502d4197bdac35dd7ee3320"}, - {file = "black-22.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:559c7a1ba9a006226f09e4916060982fd27334ae1998e7a38b3f33a37f7a2148"}, - {file = "black-22.12.0-py3-none-any.whl", hash = "sha256:436cc9167dd28040ad90d3b404aec22cedf24a6e4d7de221bec2730ec0c97bcf"}, - {file = "black-22.12.0.tar.gz", hash = "sha256:229351e5a18ca30f447bf724d007f890f97e13af070bb6ad4c0a441cd7596a2f"}, -] -certifi = [ - {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, - {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, -] -cfgv = [ - {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, - {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, -] -charset-normalizer = [ - {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, - {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, - {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, - {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, - {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, - {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, - {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, -] -click = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, -] -colorama = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] -colorlog = [ - {file = "colorlog-6.7.0-py2.py3-none-any.whl", hash = "sha256:0d33ca236784a1ba3ff9c532d4964126d8a2c44f1f0cb1d2b0728196f512f662"}, - {file = "colorlog-6.7.0.tar.gz", hash = "sha256:bd94bd21c1e13fac7bd3153f4bc3a7dc0eb0974b8bc2fdf1a989e474f6e582e5"}, -] -coverage = [ - {file = "coverage-7.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cd0f7429ecfd1ff597389907045ff209c8fdb5b013d38cfa7c60728cb484b6e3"}, - {file = "coverage-7.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:966f10df9b2b2115da87f50f6a248e313c72a668248be1b9060ce935c871f276"}, - {file = "coverage-7.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0575c37e207bb9b98b6cf72fdaaa18ac909fb3d153083400c2d48e2e6d28bd8e"}, - {file = "coverage-7.3.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:245c5a99254e83875c7fed8b8b2536f040997a9b76ac4c1da5bff398c06e860f"}, - {file = "coverage-7.3.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c96dd7798d83b960afc6c1feb9e5af537fc4908852ef025600374ff1a017392"}, - {file = "coverage-7.3.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:de30c1aa80f30af0f6b2058a91505ea6e36d6535d437520067f525f7df123887"}, - {file = "coverage-7.3.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:50dd1e2dd13dbbd856ffef69196781edff26c800a74f070d3b3e3389cab2600d"}, - {file = "coverage-7.3.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9c0c19f70d30219113b18fe07e372b244fb2a773d4afde29d5a2f7930765136"}, - {file = "coverage-7.3.1-cp310-cp310-win32.whl", hash = "sha256:770f143980cc16eb601ccfd571846e89a5fe4c03b4193f2e485268f224ab602f"}, - {file = "coverage-7.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:cdd088c00c39a27cfa5329349cc763a48761fdc785879220d54eb785c8a38520"}, - {file = "coverage-7.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:74bb470399dc1989b535cb41f5ca7ab2af561e40def22d7e188e0a445e7639e3"}, - {file = "coverage-7.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:025ded371f1ca280c035d91b43252adbb04d2aea4c7105252d3cbc227f03b375"}, - {file = "coverage-7.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6191b3a6ad3e09b6cfd75b45c6aeeffe7e3b0ad46b268345d159b8df8d835f9"}, - {file = "coverage-7.3.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7eb0b188f30e41ddd659a529e385470aa6782f3b412f860ce22b2491c89b8593"}, - {file = "coverage-7.3.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c8f0df9dfd8ff745bccff75867d63ef336e57cc22b2908ee725cc552689ec8"}, - {file = "coverage-7.3.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7eb3cd48d54b9bd0e73026dedce44773214064be93611deab0b6a43158c3d5a0"}, - {file = "coverage-7.3.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ac3c5b7e75acac31e490b7851595212ed951889918d398b7afa12736c85e13ce"}, - {file = "coverage-7.3.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5b4ee7080878077af0afa7238df1b967f00dc10763f6e1b66f5cced4abebb0a3"}, - {file = "coverage-7.3.1-cp311-cp311-win32.whl", hash = "sha256:229c0dd2ccf956bf5aeede7e3131ca48b65beacde2029f0361b54bf93d36f45a"}, - {file = "coverage-7.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c6f55d38818ca9596dc9019eae19a47410d5322408140d9a0076001a3dcb938c"}, - {file = "coverage-7.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5289490dd1c3bb86de4730a92261ae66ea8d44b79ed3cc26464f4c2cde581fbc"}, - {file = "coverage-7.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ca833941ec701fda15414be400c3259479bfde7ae6d806b69e63b3dc423b1832"}, - {file = "coverage-7.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd694e19c031733e446c8024dedd12a00cda87e1c10bd7b8539a87963685e969"}, - {file = "coverage-7.3.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aab8e9464c00da5cb9c536150b7fbcd8850d376d1151741dd0d16dfe1ba4fd26"}, - {file = "coverage-7.3.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87d38444efffd5b056fcc026c1e8d862191881143c3aa80bb11fcf9dca9ae204"}, - {file = "coverage-7.3.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8a07b692129b8a14ad7a37941a3029c291254feb7a4237f245cfae2de78de037"}, - {file = "coverage-7.3.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:2829c65c8faaf55b868ed7af3c7477b76b1c6ebeee99a28f59a2cb5907a45760"}, - {file = "coverage-7.3.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1f111a7d85658ea52ffad7084088277135ec5f368457275fc57f11cebb15607f"}, - {file = "coverage-7.3.1-cp312-cp312-win32.whl", hash = "sha256:c397c70cd20f6df7d2a52283857af622d5f23300c4ca8e5bd8c7a543825baa5a"}, - {file = "coverage-7.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:5ae4c6da8b3d123500f9525b50bf0168023313963e0e2e814badf9000dd6ef92"}, - {file = "coverage-7.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca70466ca3a17460e8fc9cea7123c8cbef5ada4be3140a1ef8f7b63f2f37108f"}, - {file = "coverage-7.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f2781fd3cabc28278dc982a352f50c81c09a1a500cc2086dc4249853ea96b981"}, - {file = "coverage-7.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6407424621f40205bbe6325686417e5e552f6b2dba3535dd1f90afc88a61d465"}, - {file = "coverage-7.3.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:04312b036580ec505f2b77cbbdfb15137d5efdfade09156961f5277149f5e344"}, - {file = "coverage-7.3.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac9ad38204887349853d7c313f53a7b1c210ce138c73859e925bc4e5d8fc18e7"}, - {file = "coverage-7.3.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:53669b79f3d599da95a0afbef039ac0fadbb236532feb042c534fbb81b1a4e40"}, - {file = "coverage-7.3.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:614f1f98b84eb256e4f35e726bfe5ca82349f8dfa576faabf8a49ca09e630086"}, - {file = "coverage-7.3.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f1a317fdf5c122ad642db8a97964733ab7c3cf6009e1a8ae8821089993f175ff"}, - {file = "coverage-7.3.1-cp38-cp38-win32.whl", hash = "sha256:defbbb51121189722420a208957e26e49809feafca6afeef325df66c39c4fdb3"}, - {file = "coverage-7.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:f4f456590eefb6e1b3c9ea6328c1e9fa0f1006e7481179d749b3376fc793478e"}, - {file = "coverage-7.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f12d8b11a54f32688b165fd1a788c408f927b0960984b899be7e4c190ae758f1"}, - {file = "coverage-7.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f09195dda68d94a53123883de75bb97b0e35f5f6f9f3aa5bf6e496da718f0cb6"}, - {file = "coverage-7.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6601a60318f9c3945be6ea0f2a80571f4299b6801716f8a6e4846892737ebe4"}, - {file = "coverage-7.3.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07d156269718670d00a3b06db2288b48527fc5f36859425ff7cec07c6b367745"}, - {file = "coverage-7.3.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:636a8ac0b044cfeccae76a36f3b18264edcc810a76a49884b96dd744613ec0b7"}, - {file = "coverage-7.3.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5d991e13ad2ed3aced177f524e4d670f304c8233edad3210e02c465351f785a0"}, - {file = "coverage-7.3.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:586649ada7cf139445da386ab6f8ef00e6172f11a939fc3b2b7e7c9082052fa0"}, - {file = "coverage-7.3.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4aba512a15a3e1e4fdbfed2f5392ec221434a614cc68100ca99dcad7af29f3f8"}, - {file = "coverage-7.3.1-cp39-cp39-win32.whl", hash = "sha256:6bc6f3f4692d806831c136c5acad5ccedd0262aa44c087c46b7101c77e139140"}, - {file = "coverage-7.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:553d7094cb27db58ea91332e8b5681bac107e7242c23f7629ab1316ee73c4981"}, - {file = "coverage-7.3.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:220eb51f5fb38dfdb7e5d54284ca4d0cd70ddac047d750111a68ab1798945194"}, - {file = "coverage-7.3.1.tar.gz", hash = "sha256:6cb7fe1581deb67b782c153136541e20901aa312ceedaf1467dcb35255787952"}, -] -darglint = [ - {file = "darglint-1.8.1-py3-none-any.whl", hash = "sha256:5ae11c259c17b0701618a20c3da343a3eb98b3bc4b5a83d31cdd94f5ebdced8d"}, - {file = "darglint-1.8.1.tar.gz", hash = "sha256:080d5106df149b199822e7ee7deb9c012b49891538f14a11be681044f0bb20da"}, -] -datasets = [ - {file = "datasets-2.14.5-py3-none-any.whl", hash = "sha256:dd4155091034cba04d5a28711f2ed3944275ed15c5d0c5a2d0b6b9ea34a2bdfe"}, - {file = "datasets-2.14.5.tar.gz", hash = "sha256:b738a86540ab8e1a7806c8a3790b67be0056318d0c5d5a58a1b0dbdd76c0f568"}, -] -dill = [ - {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"}, - {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"}, -] -distlib = [ - {file = "distlib-0.3.7-py2.py3-none-any.whl", hash = "sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057"}, - {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, -] -docutils = [ - {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, - {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, -] -dparse = [ - {file = "dparse-0.6.3-py3-none-any.whl", hash = "sha256:0d8fe18714056ca632d98b24fbfc4e9791d4e47065285ab486182288813a5318"}, - {file = "dparse-0.6.3.tar.gz", hash = "sha256:27bb8b4bcaefec3997697ba3f6e06b2447200ba273c0b085c3d012a04571b528"}, -] -exceptiongroup = [ - {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, - {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, -] -filelock = [ - {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"}, - {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"}, -] -flake8 = [ - {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, - {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, -] -flake8-bandit = [ - {file = "flake8_bandit-4.1.1-py3-none-any.whl", hash = "sha256:4c8a53eb48f23d4ef1e59293657181a3c989d0077c9952717e98a0eace43e06d"}, - {file = "flake8_bandit-4.1.1.tar.gz", hash = "sha256:068e09287189cbfd7f986e92605adea2067630b75380c6b5733dab7d87f9a84e"}, -] -flake8-bugbear = [ - {file = "flake8-bugbear-23.7.10.tar.gz", hash = "sha256:0ebdc7d8ec1ca8bd49347694562381f099f4de2f8ec6bda7a7dca65555d9e0d4"}, - {file = "flake8_bugbear-23.7.10-py3-none-any.whl", hash = "sha256:d99d005114020fbef47ed5e4aebafd22f167f9a0fbd0d8bf3c9e90612cb25c34"}, -] -flake8-docstrings = [ - {file = "flake8_docstrings-1.7.0-py2.py3-none-any.whl", hash = "sha256:51f2344026da083fc084166a9353f5082b01f72901df422f74b4d953ae88ac75"}, - {file = "flake8_docstrings-1.7.0.tar.gz", hash = "sha256:4c8cc748dc16e6869728699e5d0d685da9a10b0ea718e090b1ba088e67a941af"}, -] -flake8-rst-docstrings = [ - {file = "flake8-rst-docstrings-0.3.0.tar.gz", hash = "sha256:d1ce22b4bd37b73cd86b8d980e946ef198cfcc18ed82fedb674ceaa2f8d1afa4"}, - {file = "flake8_rst_docstrings-0.3.0-py3-none-any.whl", hash = "sha256:f8c3c6892ff402292651c31983a38da082480ad3ba253743de52989bdc84ca1c"}, -] -frozenlist = [ - {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, - {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, - {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"}, - {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"}, - {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"}, - {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"}, - {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"}, - {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"}, - {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"}, - {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"}, - {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, - {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, -] -fsspec = [ - {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, - {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, -] -furo = [ - {file = "furo-2023.8.19-py3-none-any.whl", hash = "sha256:12f99f87a1873b6746228cfde18f77244e6c1ffb85d7fed95e638aae70d80590"}, - {file = "furo-2023.8.19.tar.gz", hash = "sha256:e671ee638ab3f1b472f4033b0167f502ab407830e0db0f843b1c1028119c9cd1"}, -] -gitdb = [ - {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, - {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, -] -gitpython = [ - {file = "GitPython-3.1.35-py3-none-any.whl", hash = "sha256:c19b4292d7a1d3c0f653858db273ff8a6614100d1eb1528b014ec97286193c09"}, - {file = "GitPython-3.1.35.tar.gz", hash = "sha256:9cbefbd1789a5fe9bcf621bb34d3f441f3a90c8461d377f84eda73e721d9b06b"}, -] -huggingface-hub = [ - {file = "huggingface_hub-0.16.4-py3-none-any.whl", hash = "sha256:0d3df29932f334fead024afc7cb4cc5149d955238b8b5e42dcf9740d6995a349"}, - {file = "huggingface_hub-0.16.4.tar.gz", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"}, -] -identify = [ - {file = "identify-2.5.27-py2.py3-none-any.whl", hash = "sha256:fdb527b2dfe24602809b2201e033c2a113d7bdf716db3ca8e3243f735dcecaba"}, - {file = "identify-2.5.27.tar.gz", hash = "sha256:287b75b04a0e22d727bc9a41f0d4f3c1bcada97490fa6eabb5b28f0e9097e733"}, -] -idna = [ - {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, - {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, -] -imagesize = [ - {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"}, - {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, -] -importlib-metadata = [ - {file = "importlib_metadata-6.8.0-py3-none-any.whl", hash = "sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb"}, - {file = "importlib_metadata-6.8.0.tar.gz", hash = "sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743"}, -] -iniconfig = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, -] -isort = [ - {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, - {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, -] -jinja2 = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, -] -lightning-utilities = [ - {file = "lightning-utilities-0.9.0.tar.gz", hash = "sha256:efbf2c488c257f942abdfd06cf646fb84ca215a9663b60081811e22a15ee033b"}, - {file = "lightning_utilities-0.9.0-py3-none-any.whl", hash = "sha256:918dd90c775719e3855631db6282ad75c14da4c5727c4cebdd1589d865fad03d"}, -] -livereload = [ - {file = "livereload-2.6.3-py2.py3-none-any.whl", hash = "sha256:ad4ac6f53b2d62bb6ce1a5e6e96f1f00976a32348afedcb4b6d68df2a1d346e4"}, - {file = "livereload-2.6.3.tar.gz", hash = "sha256:776f2f865e59fde56490a56bcc6773b6917366bce0c267c60ee8aaf1a0959869"}, -] -markdown-it-py = [ - {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, - {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, -] -markupsafe = [ +files = [ {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, @@ -2249,6 +1028,16 @@ markupsafe = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -2280,23 +1069,72 @@ markupsafe = [ {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, ] -mccabe = [ + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +optional = false +python-versions = ">=3.6" +files = [ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] -mdit-py-plugins = [ + +[[package]] +name = "mdit-py-plugins" +version = "0.4.0" +description = "Collection of plugins for markdown-it-py" +optional = false +python-versions = ">=3.8" +files = [ {file = "mdit_py_plugins-0.4.0-py3-none-any.whl", hash = "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9"}, {file = "mdit_py_plugins-0.4.0.tar.gz", hash = "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b"}, ] -mdurl = [ + +[package.dependencies] +markdown-it-py = ">=1.0.0,<4.0.0" + +[package.extras] +code-style = ["pre-commit"] +rtd = ["myst-parser", "sphinx-book-theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] -mpmath = [ + +[[package]] +name = "mpmath" +version = "1.3.0" +description = "Python library for arbitrary-precision floating-point arithmetic" +optional = false +python-versions = "*" +files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, ] -multidict = [ + +[package.extras] +develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] +docs = ["sphinx"] +gmpy = ["gmpy2 (>=2.1.0a4)"] +tests = ["pytest (>=4.6)"] + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +optional = false +python-versions = ">=3.7" +files = [ {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, @@ -2372,25 +1210,14 @@ multidict = [ {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, ] -multiprocess = [ - {file = "multiprocess-0.70.15-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:aa36c7ed16f508091438687fe9baa393a7a8e206731d321e443745e743a0d4e5"}, - {file = "multiprocess-0.70.15-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:20e024018c46d0d1602024c613007ac948f9754659e3853b0aa705e83f6931d8"}, - {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_i686.whl", hash = "sha256:e576062981c91f0fe8a463c3d52506e598dfc51320a8dd8d78b987dfca91c5db"}, - {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:e73f497e6696a0f5433ada2b3d599ae733b87a6e8b008e387c62ac9127add177"}, - {file = "multiprocess-0.70.15-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:73db2e7b32dcc7f9b0f075c2ffa45c90b6729d3f1805f27e88534c8d321a1be5"}, - {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_i686.whl", hash = "sha256:4271647bd8a49c28ecd6eb56a7fdbd3c212c45529ad5303b40b3c65fc6928e5f"}, - {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:cf981fb998d6ec3208cb14f0cf2e9e80216e834f5d51fd09ebc937c32b960902"}, - {file = "multiprocess-0.70.15-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:18f9f2c7063346d1617bd1684fdcae8d33380ae96b99427260f562e1a1228b67"}, - {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_i686.whl", hash = "sha256:0eac53214d664c49a34695e5824872db4006b1a465edd7459a251809c3773370"}, - {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:1a51dd34096db47fb21fa2b839e615b051d51b97af9a67afbcdaa67186b44883"}, - {file = "multiprocess-0.70.15-py310-none-any.whl", hash = "sha256:7dd58e33235e83cf09d625e55cffd7b0f0eede7ee9223cdd666a87624f60c21a"}, - {file = "multiprocess-0.70.15-py311-none-any.whl", hash = "sha256:134f89053d82c9ed3b73edd3a2531eb791e602d4f4156fc92a79259590bd9670"}, - {file = "multiprocess-0.70.15-py37-none-any.whl", hash = "sha256:f7d4a1629bccb433114c3b4885f69eccc200994323c80f6feee73b0edc9199c5"}, - {file = "multiprocess-0.70.15-py38-none-any.whl", hash = "sha256:bee9afba476c91f9ebee7beeee0601face9eff67d822e893f9a893725fbd6316"}, - {file = "multiprocess-0.70.15-py39-none-any.whl", hash = "sha256:3e0953f5d52b4c76f1c973eaf8214554d146f2be5decb48e928e55c7a2d19338"}, - {file = "multiprocess-0.70.15.tar.gz", hash = "sha256:f20eed3036c0ef477b07a4177cf7c1ba520d9a2677870a4f47fe026f0cd6787e"}, -] -mypy = [ + +[[package]] +name = "mypy" +version = "1.5.1" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ {file = "mypy-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f33592ddf9655a4894aef22d134de7393e95fcbdc2d15c1ab65828eee5c66c70"}, {file = "mypy-1.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:258b22210a4a258ccd077426c7a181d789d1121aca6db73a83f79372f5569ae0"}, {file = "mypy-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9ec1f695f0c25986e6f7f8778e5ce61659063268836a38c951200c57479cc12"}, @@ -2419,31 +1246,129 @@ mypy = [ {file = "mypy-1.5.1-py3-none-any.whl", hash = "sha256:f757063a83970d67c444f6e01d9550a7402322af3557ce7630d3c957386fa8f5"}, {file = "mypy-1.5.1.tar.gz", hash = "sha256:b031b9601f1060bf1281feab89697324726ba0c0bae9d7cd7ab4b690940f0b92"}, ] -mypy-extensions = [ + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.1.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] -myst-parser = [ + +[[package]] +name = "myst-parser" +version = "2.0.0" +description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser," +optional = false +python-versions = ">=3.8" +files = [ {file = "myst_parser-2.0.0-py3-none-any.whl", hash = "sha256:7c36344ae39c8e740dad7fdabf5aa6fc4897a813083c6cc9990044eb93656b14"}, {file = "myst_parser-2.0.0.tar.gz", hash = "sha256:ea929a67a6a0b1683cdbe19b8d2e724cd7643f8aa3e7bb18dd65beac3483bead"}, ] -networkx = [ + +[package.dependencies] +docutils = ">=0.16,<0.21" +jinja2 = "*" +markdown-it-py = ">=3.0,<4.0" +mdit-py-plugins = ">=0.4,<1.0" +pyyaml = "*" +sphinx = ">=6,<8" + +[package.extras] +code-style = ["pre-commit (>=3.0,<4.0)"] +linkify = ["linkify-it-py (>=2.0,<3.0)"] +rtd = ["ipython", "pydata-sphinx-theme (==v0.13.0rc4)", "sphinx-autodoc2 (>=0.4.2,<0.5.0)", "sphinx-book-theme (==1.0.0rc2)", "sphinx-copybutton", "sphinx-design2", "sphinx-pyscript", "sphinx-tippy (>=0.3.1)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.8.2,<0.9.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] +testing = ["beautifulsoup4", "coverage[toml]", "pytest (>=7,<8)", "pytest-cov", "pytest-param-files (>=0.3.4,<0.4.0)", "pytest-regressions", "sphinx-pytest"] +testing-docutils = ["pygments", "pytest (>=7,<8)", "pytest-param-files (>=0.3.4,<0.4.0)"] + +[[package]] +name = "networkx" +version = "3.1" +description = "Python package for creating and manipulating graphs and networks" +optional = false +python-versions = ">=3.8" +files = [ {file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"}, {file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"}, ] -nodeenv = [ + +[package.extras] +default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"] +developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"] +doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"] +test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] + +[[package]] +name = "nodeenv" +version = "1.8.0" +description = "Node.js virtual environment builder" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" +files = [ {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, ] -nox = [ + +[package.dependencies] +setuptools = "*" + +[[package]] +name = "nox" +version = "2023.4.22" +description = "Flexible test automation." +optional = false +python-versions = ">=3.7" +files = [ {file = "nox-2023.4.22-py3-none-any.whl", hash = "sha256:0b1adc619c58ab4fa57d6ab2e7823fe47a32e70202f287d78474adcc7bda1891"}, {file = "nox-2023.4.22.tar.gz", hash = "sha256:46c0560b0dc609d7d967dc99e22cb463d3c4caf54a5fda735d6c11b5177e3a9f"}, ] -nox-poetry = [ + +[package.dependencies] +argcomplete = ">=1.9.4,<4.0" +colorlog = ">=2.6.1,<7.0.0" +packaging = ">=20.9" +virtualenv = ">=14" + +[package.extras] +tox-to-nox = ["jinja2", "tox (<4)"] + +[[package]] +name = "nox-poetry" +version = "1.0.3" +description = "nox-poetry" +optional = false +python-versions = ">=3.7,<4.0" +files = [ {file = "nox_poetry-1.0.3-py3-none-any.whl", hash = "sha256:a2fffeb70ae81840479e68287afe1c772bf376f70f1e92f99832a20b3c64d064"}, {file = "nox_poetry-1.0.3.tar.gz", hash = "sha256:dc7ecbbd812a333a0c0b558f57e5b37f7c12926cddbcecaf2264957fd373824e"}, ] -numpy = [ + +[package.dependencies] +nox = ">=2020.8.22" +packaging = ">=20.9" +tomlkit = ">=0.7" + +[[package]] +name = "numpy" +version = "1.25.2" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"}, {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"}, {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"}, @@ -2470,11 +1395,28 @@ numpy = [ {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"}, {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, ] -packaging = [ + +[[package]] +name = "packaging" +version = "21.3" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.6" +files = [ {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, ] -pandas = [ + +[package.dependencies] +pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" + +[[package]] +name = "pandas" +version = "2.1.0" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ {file = "pandas-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:40dd20439ff94f1b2ed55b393ecee9cb6f3b08104c2c40b0cb7186a2f0046242"}, {file = "pandas-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4f38e4fedeba580285eaac7ede4f686c6701a9e618d8a857b138a126d067f2f"}, {file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e6a0fe052cf27ceb29be9429428b4918f3740e37ff185658f40d8702f0b3e09"}, @@ -2495,110 +1437,383 @@ pandas = [ {file = "pandas-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0164b85937707ec7f70b34a6c3a578dbf0f50787f910f21ca3b26a7fd3363437"}, {file = "pandas-2.1.0.tar.gz", hash = "sha256:62c24c7fc59e42b775ce0679cfa7b14a5f9bfb7643cfbe708c960699e05fb918"}, ] -pathspec = [ + +[package.dependencies] +numpy = {version = ">=1.23.2", markers = "python_version >= \"3.11\""} +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] +aws = ["s3fs (>=2022.05.0)"] +clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] +compression = ["zstandard (>=0.17.0)"] +computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2022.05.0)"] +gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] +hdf5 = ["tables (>=3.7.0)"] +html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] +mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] +spss = ["pyreadstat (>=1.1.5)"] +sql-other = ["SQLAlchemy (>=1.4.36)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.8.0)"] + +[[package]] +name = "pandas" +version = "2.1.2" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:24057459f19db9ebb02984c6fdd164a970b31a95f38e4a49cf7615b36a1b532c"}, + {file = "pandas-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a6cf8fcc8a63d333970b950a7331a30544cf59b1a97baf0a7409e09eafc1ac38"}, + {file = "pandas-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ae6ffbd9d614c20d028c7117ee911fc4e266b4dca2065d5c5909e401f8ff683"}, + {file = "pandas-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eff794eeb7883c5aefb1ed572e7ff533ae779f6c6277849eab9e77986e352688"}, + {file = "pandas-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:02954e285e8e2f4006b6f22be6f0df1f1c3c97adbb7ed211c6b483426f20d5c8"}, + {file = "pandas-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:5b40c9f494e1f27588c369b9e4a6ca19cd924b3a0e1ef9ef1a8e30a07a438f43"}, + {file = "pandas-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:08d287b68fd28906a94564f15118a7ca8c242e50ae7f8bd91130c362b2108a81"}, + {file = "pandas-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bbd98dcdcd32f408947afdb3f7434fade6edd408c3077bbce7bd840d654d92c6"}, + {file = "pandas-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e90c95abb3285d06f6e4feedafc134306a8eced93cb78e08cf50e224d5ce22e2"}, + {file = "pandas-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52867d69a54e71666cd184b04e839cff7dfc8ed0cd6b936995117fdae8790b69"}, + {file = "pandas-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8d0382645ede2fde352da2a885aac28ec37d38587864c0689b4b2361d17b1d4c"}, + {file = "pandas-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:65177d1c519b55e5b7f094c660ed357bb7d86e799686bb71653b8a4803d8ff0d"}, + {file = "pandas-2.1.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5aa6b86802e8cf7716bf4b4b5a3c99b12d34e9c6a9d06dad254447a620437931"}, + {file = "pandas-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d594e2ce51b8e0b4074e6644758865dc2bb13fd654450c1eae51201260a539f1"}, + {file = "pandas-2.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3223f997b6d2ebf9c010260cf3d889848a93f5d22bb4d14cd32638b3d8bba7ad"}, + {file = "pandas-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4944dc004ca6cc701dfa19afb8bdb26ad36b9bed5bcec617d2a11e9cae6902"}, + {file = "pandas-2.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3f76280ce8ec216dde336e55b2b82e883401cf466da0fe3be317c03fb8ee7c7d"}, + {file = "pandas-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:7ad20d24acf3a0042512b7e8d8fdc2e827126ed519d6bd1ed8e6c14ec8a2c813"}, + {file = "pandas-2.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:021f09c15e1381e202d95d4a21ece8e7f2bf1388b6d7e9cae09dfe27bd2043d1"}, + {file = "pandas-2.1.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e7f12b2de0060b0b858cfec0016e7d980ae5bae455a1746bfcc70929100ee633"}, + {file = "pandas-2.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83c166b9bb27c1715bed94495d9598a7f02950b4749dba9349c1dd2cbf10729d"}, + {file = "pandas-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25c9976c17311388fcd953cb3d0697999b2205333f4e11e669d90ff8d830d429"}, + {file = "pandas-2.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:851b5afbb0d62f6129ae891b533aa508cc357d5892c240c91933d945fff15731"}, + {file = "pandas-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:e78507adcc730533619de07bfdd1c62b2918a68cd4419ea386e28abf7f6a1e5c"}, + {file = "pandas-2.1.2.tar.gz", hash = "sha256:52897edc2774d2779fbeb6880d2cfb305daa0b1a29c16b91f531a18918a6e0f3"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] +aws = ["s3fs (>=2022.05.0)"] +clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] +compression = ["zstandard (>=0.17.0)"] +computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2022.05.0)"] +gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] +hdf5 = ["tables (>=3.7.0)"] +html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] +mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] +spss = ["pyreadstat (>=1.1.5)"] +sql-other = ["SQLAlchemy (>=1.4.36)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.8.0)"] + +[[package]] +name = "pathspec" +version = "0.11.2" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.7" +files = [ {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, ] -pbr = [ + +[[package]] +name = "pbr" +version = "5.11.1" +description = "Python Build Reasonableness" +optional = false +python-versions = ">=2.6" +files = [ {file = "pbr-5.11.1-py2.py3-none-any.whl", hash = "sha256:567f09558bae2b3ab53cb3c1e2e33e726ff3338e7bae3db5dc954b3a44eef12b"}, {file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"}, ] -pep8-naming = [ + +[[package]] +name = "pep8-naming" +version = "0.13.3" +description = "Check PEP-8 naming conventions, plugin for flake8" +optional = false +python-versions = ">=3.7" +files = [ {file = "pep8-naming-0.13.3.tar.gz", hash = "sha256:1705f046dfcd851378aac3be1cd1551c7c1e5ff363bacad707d43007877fa971"}, {file = "pep8_naming-0.13.3-py3-none-any.whl", hash = "sha256:1a86b8c71a03337c97181917e2b472f0f5e4ccb06844a0d6f0a33522549e7a80"}, ] -platformdirs = [ + +[package.dependencies] +flake8 = ">=5.0.0" + +[[package]] +name = "platformdirs" +version = "3.10.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ {file = "platformdirs-3.10.0-py3-none-any.whl", hash = "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d"}, {file = "platformdirs-3.10.0.tar.gz", hash = "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d"}, ] -pluggy = [ + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] + +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, ] -pre-commit = [ + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pre-commit" +version = "3.4.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.8" +files = [ {file = "pre_commit-3.4.0-py2.py3-none-any.whl", hash = "sha256:96d529a951f8b677f730a7212442027e8ba53f9b04d217c4c67dc56c393ad945"}, {file = "pre_commit-3.4.0.tar.gz", hash = "sha256:6bbd5129a64cad4c0dfaeeb12cd8f7ea7e15b77028d985341478c8af3c759522"}, ] -pre-commit-hooks = [ + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + +[[package]] +name = "pre-commit-hooks" +version = "4.4.0" +description = "Some out-of-the-box hooks for pre-commit." +optional = false +python-versions = ">=3.7" +files = [ {file = "pre_commit_hooks-4.4.0-py2.py3-none-any.whl", hash = "sha256:fc8837335476221ccccda3d176ed6ae29fe58753ce7e8b7863f5d0f987328fc6"}, {file = "pre_commit_hooks-4.4.0.tar.gz", hash = "sha256:7011eed8e1a25cde94693da009cba76392194cecc2f3f06c51a44ea6ad6c2af9"}, ] -pyarrow = [ - {file = "pyarrow-13.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:1afcc2c33f31f6fb25c92d50a86b7a9f076d38acbcb6f9e74349636109550148"}, - {file = "pyarrow-13.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70fa38cdc66b2fc1349a082987f2b499d51d072faaa6b600f71931150de2e0e3"}, - {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd57b13a6466822498238877892a9b287b0a58c2e81e4bdb0b596dbb151cbb73"}, - {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ce69f7bf01de2e2764e14df45b8404fc6f1a5ed9871e8e08a12169f87b7a26"}, - {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:588f0d2da6cf1b1680974d63be09a6530fd1bd825dc87f76e162404779a157dc"}, - {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:6241afd72b628787b4abea39e238e3ff9f34165273fad306c7acf780dd850956"}, - {file = "pyarrow-13.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:fda7857e35993673fcda603c07d43889fca60a5b254052a462653f8656c64f44"}, - {file = "pyarrow-13.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:aac0ae0146a9bfa5e12d87dda89d9ef7c57a96210b899459fc2f785303dcbb67"}, - {file = "pyarrow-13.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d7759994217c86c161c6a8060509cfdf782b952163569606bb373828afdd82e8"}, - {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:868a073fd0ff6468ae7d869b5fc1f54de5c4255b37f44fb890385eb68b68f95d"}, - {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51be67e29f3cfcde263a113c28e96aa04362ed8229cb7c6e5f5c719003659d33"}, - {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d1b4e7176443d12610874bb84d0060bf080f000ea9ed7c84b2801df851320295"}, - {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:69b6f9a089d116a82c3ed819eea8fe67dae6105f0d81eaf0fdd5e60d0c6e0944"}, - {file = "pyarrow-13.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ab1268db81aeb241200e321e220e7cd769762f386f92f61b898352dd27e402ce"}, - {file = "pyarrow-13.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:ee7490f0f3f16a6c38f8c680949551053c8194e68de5046e6c288e396dccee80"}, - {file = "pyarrow-13.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3ad79455c197a36eefbd90ad4aa832bece7f830a64396c15c61a0985e337287"}, - {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68fcd2dc1b7d9310b29a15949cdd0cb9bc34b6de767aff979ebf546020bf0ba0"}, - {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc6fd330fd574c51d10638e63c0d00ab456498fc804c9d01f2a61b9264f2c5b2"}, - {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:e66442e084979a97bb66939e18f7b8709e4ac5f887e636aba29486ffbf373763"}, - {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:0f6eff839a9e40e9c5610d3ff8c5bdd2f10303408312caf4c8003285d0b49565"}, - {file = "pyarrow-13.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b30a27f1cddf5c6efcb67e598d7823a1e253d743d92ac32ec1eb4b6a1417867"}, - {file = "pyarrow-13.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:09552dad5cf3de2dc0aba1c7c4b470754c69bd821f5faafc3d774bedc3b04bb7"}, - {file = "pyarrow-13.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3896ae6c205d73ad192d2fc1489cd0edfab9f12867c85b4c277af4d37383c18c"}, - {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6647444b21cb5e68b593b970b2a9a07748dd74ea457c7dadaa15fd469c48ada1"}, - {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47663efc9c395e31d09c6aacfa860f4473815ad6804311c5433f7085415d62a7"}, - {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:b9ba6b6d34bd2563345488cf444510588ea42ad5613df3b3509f48eb80250afd"}, - {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:d00d374a5625beeb448a7fa23060df79adb596074beb3ddc1838adb647b6ef09"}, - {file = "pyarrow-13.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:c51afd87c35c8331b56f796eff954b9c7f8d4b7fef5903daf4e05fcf017d23a8"}, - {file = "pyarrow-13.0.0.tar.gz", hash = "sha256:83333726e83ed44b0ac94d8d7a21bbdee4a05029c3b1e8db58a863eec8fd8a33"}, -] -pycodestyle = [ + +[package.dependencies] +"ruamel.yaml" = ">=0.15" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} + +[[package]] +name = "pycodestyle" +version = "2.11.0" +description = "Python style guide checker" +optional = false +python-versions = ">=3.8" +files = [ {file = "pycodestyle-2.11.0-py2.py3-none-any.whl", hash = "sha256:5d1013ba8dc7895b548be5afb05740ca82454fd899971563d2ef625d090326f8"}, {file = "pycodestyle-2.11.0.tar.gz", hash = "sha256:259bcc17857d8a8b3b4a2327324b79e5f020a13c16074670f9c8c8f872ea76d0"}, ] -pydocstyle = [ + +[[package]] +name = "pydocstyle" +version = "6.3.0" +description = "Python docstring style checker" +optional = false +python-versions = ">=3.6" +files = [ {file = "pydocstyle-6.3.0-py3-none-any.whl", hash = "sha256:118762d452a49d6b05e194ef344a55822987a462831ade91ec5c06fd2169d019"}, {file = "pydocstyle-6.3.0.tar.gz", hash = "sha256:7ce43f0c0ac87b07494eb9c0b462c0b73e6ff276807f204d6b53edc72b7e44e1"}, ] -pyflakes = [ + +[package.dependencies] +snowballstemmer = ">=2.2.0" + +[package.extras] +toml = ["tomli (>=1.2.3)"] + +[[package]] +name = "pyflakes" +version = "3.1.0" +description = "passive checker of Python programs" +optional = false +python-versions = ">=3.8" +files = [ {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, ] -pygments = [ + +[[package]] +name = "pygments" +version = "2.16.1" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.7" +files = [ {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, ] -pyparsing = [ + +[package.extras] +plugins = ["importlib-metadata"] + +[[package]] +name = "pyparsing" +version = "3.1.1" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.6.8" +files = [ {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, ] -pytest = [ + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "pytest" +version = "7.4.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ {file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"}, {file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"}, ] -python-dateutil = [ + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, ] -python-dotenv = [ + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-dotenv" +version = "0.20.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.5" +files = [ {file = "python-dotenv-0.20.0.tar.gz", hash = "sha256:b7e3b04a59693c42c36f9ab1cc2acc46fa5df8c78e178fc33a8d4cd05c8d498f"}, {file = "python_dotenv-0.20.0-py3-none-any.whl", hash = "sha256:d92a187be61fe482e4fd675b6d52200e7be63a12b724abbf931a40ce4fa92938"}, ] -pytorch-lightning = [ + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "pytorch-lightning" +version = "2.0.8" +description = "PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers. Scale your models. Write less boilerplate." +optional = false +python-versions = ">=3.8" +files = [ {file = "pytorch-lightning-2.0.8.tar.gz", hash = "sha256:fb7e8fbe473158b2c1666b6b31fb996c8aa2c3f5e8e2a54a3f50a7b5b2d00a20"}, {file = "pytorch_lightning-2.0.8-py3-none-any.whl", hash = "sha256:718d11f22551d95ef38614b4727433553c95ea2b50cf843938fb13baf34325a6"}, ] -pytz = [ + +[package.dependencies] +fsspec = {version = ">2021.06.0", extras = ["http"]} +lightning-utilities = ">=0.7.0" +numpy = ">=1.17.2" +packaging = ">=17.1" +PyYAML = ">=5.4" +torch = ">=1.11.0" +torchmetrics = ">=0.7.0" +tqdm = ">=4.57.0" +typing-extensions = ">=4.0.0" + +[package.extras] +all = ["deepspeed (>=0.8.2)", "gym[classic-control] (>=0.17.0)", "hydra-core (>=1.0.5)", "ipython[all] (<8.14.1)", "jsonargparse[signatures] (>=4.18.0,<4.23.0)", "lightning-utilities (>=0.7.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "rich (>=12.3.0)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchvision (>=0.12.0)"] +deepspeed = ["deepspeed (>=0.8.2)"] +dev = ["cloudpickle (>=1.3)", "coverage (==7.3.0)", "deepspeed (>=0.8.2)", "fastapi (<0.100.0)", "gym[classic-control] (>=0.17.0)", "hydra-core (>=1.0.5)", "ipython[all] (<8.14.1)", "jsonargparse[signatures] (>=4.18.0,<4.23.0)", "lightning-utilities (>=0.7.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "onnx (<1.15.0)", "onnxruntime (<1.16.0)", "pandas (>1.0)", "protobuf (<=3.20.1)", "psutil (<5.9.6)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-forked (==1.4.0)", "pytest-rerunfailures (==10.3)", "rich (>=12.3.0)", "scikit-learn (>0.22.1)", "tensorboard (>=2.9.1)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchvision (>=0.12.0)", "uvicorn (<0.23.3)"] +examples = ["gym[classic-control] (>=0.17.0)", "ipython[all] (<8.14.1)", "lightning-utilities (>=0.7.0)", "torchmetrics (>=0.10.0)", "torchvision (>=0.12.0)"] +extra = ["hydra-core (>=1.0.5)", "jsonargparse[signatures] (>=4.18.0,<4.23.0)", "matplotlib (>3.1)", "omegaconf (>=2.0.5)", "rich (>=12.3.0)", "tensorboardX (>=2.2)"] +strategies = ["deepspeed (>=0.8.2)"] +test = ["cloudpickle (>=1.3)", "coverage (==7.3.0)", "fastapi (<0.100.0)", "onnx (<1.15.0)", "onnxruntime (<1.16.0)", "pandas (>1.0)", "protobuf (<=3.20.1)", "psutil (<5.9.6)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-forked (==1.4.0)", "pytest-rerunfailures (==10.3)", "scikit-learn (>0.22.1)", "tensorboard (>=2.9.1)", "uvicorn (<0.23.3)"] + +[[package]] +name = "pytz" +version = "2023.3.post1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, ] -pyupgrade = [ + +[[package]] +name = "pyupgrade" +version = "3.10.1" +description = "A tool to automatically upgrade syntax for newer versions." +optional = false +python-versions = ">=3.8.1" +files = [ {file = "pyupgrade-3.10.1-py2.py3-none-any.whl", hash = "sha256:f565b4d26daa46ed522e98746834e77e444269103f8bc04413d77dad95169a24"}, {file = "pyupgrade-3.10.1.tar.gz", hash = "sha256:1d8d138c2ccdd3c42b1419230ae036d5607dc69465a26feacc069642fc8d1b90"}, ] -pyyaml = [ + +[package.dependencies] +tokenize-rt = ">=5.2.0" + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, @@ -2650,7 +1865,14 @@ pyyaml = [ {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] -regex = [ + +[[package]] +name = "regex" +version = "2023.8.8" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.6" +files = [ {file = "regex-2023.8.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:88900f521c645f784260a8d346e12a1590f79e96403971241e64c3a265c8ecdb"}, {file = "regex-2023.8.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3611576aff55918af2697410ff0293d6071b7e00f4b09e005d614686ac4cd57c"}, {file = "regex-2023.8.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8a0ccc8f2698f120e9e5742f4b38dc944c38744d4bdfc427616f3a163dd9de5"}, @@ -2740,22 +1962,84 @@ regex = [ {file = "regex-2023.8.8-cp39-cp39-win_amd64.whl", hash = "sha256:5543c055d8ec7801901e1193a51570643d6a6ab8751b1f7dd9af71af467538bb"}, {file = "regex-2023.8.8.tar.gz", hash = "sha256:fcbdc5f2b0f1cd0f6a56cdb46fe41d2cce1e644e3b68832f3eeebc5fb0f7712e"}, ] -requests = [ + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, ] -restructuredtext-lint = [ + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "restructuredtext-lint" +version = "1.4.0" +description = "reStructuredText linter" +optional = false +python-versions = "*" +files = [ {file = "restructuredtext_lint-1.4.0.tar.gz", hash = "sha256:1b235c0c922341ab6c530390892eb9e92f90b9b75046063e047cacfb0f050c45"}, ] -rich = [ + +[package.dependencies] +docutils = ">=0.11,<1.0" + +[[package]] +name = "rich" +version = "13.5.2" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ {file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"}, {file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"}, ] -"ruamel.yaml" = [ + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + +[[package]] +name = "ruamel.yaml" +version = "0.17.32" +description = "ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order" +optional = false +python-versions = ">=3" +files = [ {file = "ruamel.yaml-0.17.32-py3-none-any.whl", hash = "sha256:23cd2ed620231677564646b0c6a89d138b6822a0d78656df7abda5879ec4f447"}, {file = "ruamel.yaml-0.17.32.tar.gz", hash = "sha256:ec939063761914e14542972a5cba6d33c23b0859ab6342f61cf070cfc600efc2"}, ] -"ruamel.yaml.clib" = [ + +[package.dependencies] +"ruamel.yaml.clib" = {version = ">=0.2.7", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.12\""} + +[package.extras] +docs = ["ryd"] +jinja2 = ["ruamel.yaml.jinja2 (>=0.2)"] + +[[package]] +name = "ruamel.yaml.clib" +version = "0.2.7" +description = "C version of reader, parser and emitter for ruamel.yaml derived from libyaml" +optional = false +python-versions = ">=3.5" +files = [ {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d5859983f26d8cd7bb5c287ef452e8aacc86501487634573d260968f753e1d71"}, {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:debc87a9516b237d0466a711b18b6ebeb17ba9f391eb7f91c649c5c4ec5006c7"}, {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:df5828871e6648db72d1c19b4bd24819b80a755c4541d3409f0f7acd0f335c80"}, @@ -2763,8 +2047,11 @@ rich = [ {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-win32.whl", hash = "sha256:763d65baa3b952479c4e972669f679fe490eee058d5aa85da483ebae2009d231"}, {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a"}, {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:045e0626baf1c52e5527bd5db361bc83180faaba2ff586e763d3d5982a876a9e"}, - {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_12_6_arm64.whl", hash = "sha256:721bc4ba4525f53f6a611ec0967bdcee61b31df5a56801281027a3a6d1c2daf5"}, + {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:1a6391a7cabb7641c32517539ca42cf84b87b667bad38b78d4d42dd23e957c81"}, + {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:9c7617df90c1365638916b98cdd9be833d31d337dbcd722485597b43c4a215bf"}, {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:41d0f1fa4c6830176eef5b276af04c89320ea616655d01327d5ce65e50575c94"}, + {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-win32.whl", hash = "sha256:f6d3d39611ac2e4f62c3128a9eed45f19a6608670c5a2f4f07f24e8de3441d38"}, + {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:da538167284de58a52109a9b89b8f6a53ff8437dd6dc26d33b57bf6699153122"}, {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4b3a93bb9bc662fc1f99c5c3ea8e623d8b23ad22f861eb6fce9377ac07ad6072"}, {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-macosx_12_0_arm64.whl", hash = "sha256:a234a20ae07e8469da311e182e70ef6b199d0fbeb6c6cc2901204dd87fb867e8"}, {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:15910ef4f3e537eea7fe45f8a5d19997479940d9196f357152a09031c5be59f3"}, @@ -2791,7 +2078,14 @@ rich = [ {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-win_amd64.whl", hash = "sha256:184faeaec61dbaa3cace407cffc5819f7b977e75360e8d5ca19461cd851a5fc5"}, {file = "ruamel.yaml.clib-0.2.7.tar.gz", hash = "sha256:1f08fd5a2bea9c4180db71678e850b995d2a5f4537be0e94557668cf0f5f9497"}, ] -safetensors = [ + +[[package]] +name = "safetensors" +version = "0.3.3.post1" +description = "" +optional = false +python-versions = ">=3.7" +files = [ {file = "safetensors-0.3.3.post1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:4b73107dab584b1cc7fef55da7b020317b81c7265ce5228e1e130c1675cf90f8"}, {file = "safetensors-0.3.3.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0f976ccaad239c7c1f5010e8b939a8c9f9301313b0039626e85f82e9dd83826c"}, {file = "safetensors-0.3.3.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ab37346d17f1e76df2bc4ee8cba08980bf3278ee8893c28cf9713163f40722"}, @@ -2891,83 +2185,350 @@ safetensors = [ {file = "safetensors-0.3.3.post1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dda5d7f65009359d27c7413edded809da9c88220ef13478649a2e52c6a8cd718"}, {file = "safetensors-0.3.3.post1.tar.gz", hash = "sha256:d1f47d64700438a93e5f8149c6bc5bd66eaf286b8f2900f103482eaf392295fe"}, ] -safety = [ + +[package.extras] +all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"] +dev = ["safetensors[all]"] +jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"] +numpy = ["numpy (>=1.21.6)"] +paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"] +pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"] +quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] +tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] +testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools_rust (>=1.5.2)"] +torch = ["safetensors[numpy]", "torch (>=1.10)"] + +[[package]] +name = "safety" +version = "2.3.5" +description = "Checks installed dependencies for known vulnerabilities and licenses." +optional = false +python-versions = "*" +files = [ {file = "safety-2.3.5-py3-none-any.whl", hash = "sha256:2227fcac1b22b53c1615af78872b48348661691450aa25d6704a5504dbd1f7e2"}, {file = "safety-2.3.5.tar.gz", hash = "sha256:a60c11f8952f412cbb165d70cb1f673a3b43a2ba9a93ce11f97e6a4de834aa3a"}, ] -sh = [ + +[package.dependencies] +Click = ">=8.0.2" +dparse = ">=0.6.2" +packaging = ">=21.0,<22.0" +requests = "*" +"ruamel.yaml" = ">=0.17.21" +setuptools = ">=19.3" + +[package.extras] +github = ["jinja2 (>=3.1.0)", "pygithub (>=1.43.3)"] +gitlab = ["python-gitlab (>=1.3.0)"] + +[[package]] +name = "setuptools" +version = "68.2.2" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-68.2.2-py3-none-any.whl", hash = "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a"}, + {file = "setuptools-68.2.2.tar.gz", hash = "sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "sh" +version = "2.0.6" +description = "Python subprocess replacement" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ {file = "sh-2.0.6-py3-none-any.whl", hash = "sha256:ced8f2e081a858b66a46ace3703dec243779abbd5a1887ba7e3c34f34da70cd2"}, {file = "sh-2.0.6.tar.gz", hash = "sha256:9b2998f313f201c777e2c0061f0b1367497097ef13388595be147e2a00bf7ba1"}, ] -six = [ + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] -smmap = [ + +[[package]] +name = "smmap" +version = "5.0.0" +description = "A pure Python implementation of a sliding window memory map manager" +optional = false +python-versions = ">=3.6" +files = [ {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"}, {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"}, ] -snowballstemmer = [ + +[[package]] +name = "snowballstemmer" +version = "2.2.0" +description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +optional = false +python-versions = "*" +files = [ {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"}, {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, ] -soupsieve = [ + +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, ] -sphinx = [ + +[[package]] +name = "sphinx" +version = "7.2.5" +description = "Python documentation generator" +optional = false +python-versions = ">=3.9" +files = [ {file = "sphinx-7.2.5-py3-none-any.whl", hash = "sha256:9269f9ed2821c9ebd30e4204f5c2339f5d4980e377bc89cb2cb6f9b17409c20a"}, {file = "sphinx-7.2.5.tar.gz", hash = "sha256:1a9290001b75c497fd087e92b0334f1bbfa1a1ae7fddc084990c4b7bd1130b88"}, ] -sphinx-autobuild = [ + +[package.dependencies] +alabaster = ">=0.7,<0.8" +babel = ">=2.9" +colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} +docutils = ">=0.18.1,<0.21" +imagesize = ">=1.3" +importlib-metadata = {version = ">=4.8", markers = "python_version < \"3.10\""} +Jinja2 = ">=3.0" +packaging = ">=21.0" +Pygments = ">=2.14" +requests = ">=2.25.0" +snowballstemmer = ">=2.0" +sphinxcontrib-applehelp = "*" +sphinxcontrib-devhelp = "*" +sphinxcontrib-htmlhelp = ">=2.0.0" +sphinxcontrib-jsmath = "*" +sphinxcontrib-qthelp = "*" +sphinxcontrib-serializinghtml = ">=1.1.9" + +[package.extras] +docs = ["sphinxcontrib-websupport"] +lint = ["docutils-stubs", "flake8 (>=3.5.0)", "flake8-simplify", "isort", "mypy (>=0.990)", "ruff", "sphinx-lint", "types-requests"] +test = ["cython (>=3.0)", "filelock", "html5lib", "pytest (>=4.6)", "setuptools (>=67.0)"] + +[[package]] +name = "sphinx-autobuild" +version = "2021.3.14" +description = "Rebuild Sphinx documentation on changes, with live-reload in the browser." +optional = false +python-versions = ">=3.6" +files = [ {file = "sphinx-autobuild-2021.3.14.tar.gz", hash = "sha256:de1ca3b66e271d2b5b5140c35034c89e47f263f2cd5db302c9217065f7443f05"}, {file = "sphinx_autobuild-2021.3.14-py3-none-any.whl", hash = "sha256:8fe8cbfdb75db04475232f05187c776f46f6e9e04cacf1e49ce81bdac649ccac"}, ] -sphinx-basic-ng = [ + +[package.dependencies] +colorama = "*" +livereload = "*" +sphinx = "*" + +[package.extras] +test = ["pytest", "pytest-cov"] + +[[package]] +name = "sphinx-basic-ng" +version = "1.0.0b2" +description = "A modern skeleton for Sphinx themes." +optional = false +python-versions = ">=3.7" +files = [ {file = "sphinx_basic_ng-1.0.0b2-py3-none-any.whl", hash = "sha256:eb09aedbabfb650607e9b4b68c9d240b90b1e1be221d6ad71d61c52e29f7932b"}, {file = "sphinx_basic_ng-1.0.0b2.tar.gz", hash = "sha256:9ec55a47c90c8c002b5960c57492ec3021f5193cb26cebc2dc4ea226848651c9"}, ] -sphinx-click = [ + +[package.dependencies] +sphinx = ">=4.0" + +[package.extras] +docs = ["furo", "ipython", "myst-parser", "sphinx-copybutton", "sphinx-inline-tabs"] + +[[package]] +name = "sphinx-click" +version = "5.0.1" +description = "Sphinx extension that automatically documents click applications" +optional = false +python-versions = ">=3.8" +files = [ {file = "sphinx-click-5.0.1.tar.gz", hash = "sha256:fcc7df15e56e3ff17ebf446cdd316c2eb79580b37c49579fba11e5468802ef25"}, {file = "sphinx_click-5.0.1-py3-none-any.whl", hash = "sha256:31836ca22f746d3c26cbfdfe0c58edf0bca5783731a0b2e25bb6d59800bb75a1"}, ] -sphinxcontrib-applehelp = [ + +[package.dependencies] +click = ">=7.0" +docutils = "*" +sphinx = ">=2.0" + +[[package]] +name = "sphinxcontrib-applehelp" +version = "1.0.7" +description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" +optional = false +python-versions = ">=3.9" +files = [ {file = "sphinxcontrib_applehelp-1.0.7-py3-none-any.whl", hash = "sha256:094c4d56209d1734e7d252f6e0b3ccc090bd52ee56807a5d9315b19c122ab15d"}, {file = "sphinxcontrib_applehelp-1.0.7.tar.gz", hash = "sha256:39fdc8d762d33b01a7d8f026a3b7d71563ea3b72787d5f00ad8465bd9d6dfbfa"}, ] -sphinxcontrib-devhelp = [ + +[package.dependencies] +Sphinx = ">=5" + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "1.0.5" +description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents" +optional = false +python-versions = ">=3.9" +files = [ {file = "sphinxcontrib_devhelp-1.0.5-py3-none-any.whl", hash = "sha256:fe8009aed765188f08fcaadbb3ea0d90ce8ae2d76710b7e29ea7d047177dae2f"}, {file = "sphinxcontrib_devhelp-1.0.5.tar.gz", hash = "sha256:63b41e0d38207ca40ebbeabcf4d8e51f76c03e78cd61abe118cf4435c73d4212"}, ] -sphinxcontrib-htmlhelp = [ + +[package.dependencies] +Sphinx = ">=5" + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.0.4" +description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" +optional = false +python-versions = ">=3.9" +files = [ {file = "sphinxcontrib_htmlhelp-2.0.4-py3-none-any.whl", hash = "sha256:8001661c077a73c29beaf4a79968d0726103c5605e27db92b9ebed8bab1359e9"}, {file = "sphinxcontrib_htmlhelp-2.0.4.tar.gz", hash = "sha256:6c26a118a05b76000738429b724a0568dbde5b72391a688577da08f11891092a"}, ] -sphinxcontrib-jsmath = [ + +[package.dependencies] +Sphinx = ">=5" + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["html5lib", "pytest"] + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +description = "A sphinx extension which renders display math in HTML via JavaScript" +optional = false +python-versions = ">=3.5" +files = [ {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"}, {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"}, ] -sphinxcontrib-qthelp = [ + +[package.extras] +test = ["flake8", "mypy", "pytest"] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "1.0.6" +description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents" +optional = false +python-versions = ">=3.9" +files = [ {file = "sphinxcontrib_qthelp-1.0.6-py3-none-any.whl", hash = "sha256:bf76886ee7470b934e363da7a954ea2825650013d367728588732c7350f49ea4"}, {file = "sphinxcontrib_qthelp-1.0.6.tar.gz", hash = "sha256:62b9d1a186ab7f5ee3356d906f648cacb7a6bdb94d201ee7adf26db55092982d"}, ] -sphinxcontrib-serializinghtml = [ + +[package.dependencies] +Sphinx = ">=5" + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "1.1.9" +description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)" +optional = false +python-versions = ">=3.9" +files = [ {file = "sphinxcontrib_serializinghtml-1.1.9-py3-none-any.whl", hash = "sha256:9b36e503703ff04f20e9675771df105e58aa029cfcbc23b8ed716019b7416ae1"}, {file = "sphinxcontrib_serializinghtml-1.1.9.tar.gz", hash = "sha256:0c64ff898339e1fac29abd2bf5f11078f3ec413cfe9c046d3120d7ca65530b54"}, ] -stevedore = [ + +[package.dependencies] +Sphinx = ">=5" + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "stevedore" +version = "5.1.0" +description = "Manage dynamic plugins for Python applications" +optional = false +python-versions = ">=3.8" +files = [ {file = "stevedore-5.1.0-py3-none-any.whl", hash = "sha256:8cc040628f3cea5d7128f2e76cf486b2251a4e543c7b938f58d9a377f6694a2d"}, {file = "stevedore-5.1.0.tar.gz", hash = "sha256:a54534acf9b89bc7ed264807013b505bf07f74dbe4bcfa37d32bd063870b087c"}, ] -sympy = [ + +[package.dependencies] +pbr = ">=2.0.0,<2.1.0 || >2.1.0" + +[[package]] +name = "sympy" +version = "1.12" +description = "Computer algebra system (CAS) in Python" +optional = false +python-versions = ">=3.8" +files = [ {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, ] -tokenize-rt = [ + +[package.dependencies] +mpmath = ">=0.19" + +[[package]] +name = "tokenize-rt" +version = "5.2.0" +description = "A wrapper around the stdlib `tokenize` which roundtrips." +optional = false +python-versions = ">=3.8" +files = [ {file = "tokenize_rt-5.2.0-py2.py3-none-any.whl", hash = "sha256:b79d41a65cfec71285433511b50271b05da3584a1da144a0752e9c621a285289"}, {file = "tokenize_rt-5.2.0.tar.gz", hash = "sha256:9fe80f8a5c1edad2d3ede0f37481cc0cc1538a2f442c9c2f9e4feacd2792d054"}, ] -tokenizers = [ + +[[package]] +name = "tokenizers" +version = "0.13.3" +description = "Fast and Customizable Tokenizers" +optional = false +python-versions = "*" +files = [ {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, @@ -3009,15 +2570,41 @@ tokenizers = [ {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, ] -tomli = [ + +[package.extras] +dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] -tomlkit = [ + +[[package]] +name = "tomlkit" +version = "0.12.1" +description = "Style preserving TOML library" +optional = false +python-versions = ">=3.7" +files = [ {file = "tomlkit-0.12.1-py3-none-any.whl", hash = "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899"}, {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"}, ] -torch = [ + +[[package]] +name = "torch" +version = "2.0.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +optional = false +python-versions = ">=3.8.0" +files = [ {file = "torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:8ced00b3ba471856b993822508f77c98f48a458623596a4c43136158781e306a"}, {file = "torch-2.0.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:359bfaad94d1cda02ab775dc1cc386d585712329bb47b8741607ef6ef4950747"}, {file = "torch-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:7c84e44d9002182edd859f3400deaa7410f5ec948a519cc7ef512c2f9b34d2c4"}, @@ -3039,11 +2626,52 @@ torch = [ {file = "torch-2.0.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:c62df99352bd6ee5a5a8d1832452110435d178b5164de450831a3a8cc14dc680"}, {file = "torch-2.0.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:671a2565e3f63b8fe8e42ae3e36ad249fe5e567435ea27b94edaa672a7d0c416"}, ] -torchmetrics = [ + +[package.dependencies] +filelock = "*" +jinja2 = "*" +networkx = "*" +sympy = "*" +typing-extensions = "*" + +[package.extras] +opt-einsum = ["opt-einsum (>=3.3)"] + +[[package]] +name = "torchmetrics" +version = "1.1.1" +description = "PyTorch native Metrics" +optional = false +python-versions = ">=3.8" +files = [ {file = "torchmetrics-1.1.1-py3-none-any.whl", hash = "sha256:903b4fc30537acfc5221505c48f7627e58dbf6d9dea85c16ea7b4323f9e13793"}, {file = "torchmetrics-1.1.1.tar.gz", hash = "sha256:65ea34205c0506eecfd06b98f63f4d2a2c5c0e17367cf324e1747adc854c80a5"}, ] -tornado = [ + +[package.dependencies] +lightning-utilities = ">=0.8.0" +numpy = ">1.20.0" +torch = ">=1.8.1" + +[package.extras] +all = ["SciencePlots (>=2.0.0)", "lpips (<=0.1.4)", "matplotlib (>=3.2.0)", "mypy (==1.5.1)", "nltk (>=3.6)", "piq (<=0.8.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "regex (>=2021.9.24)", "scipy (>1.0.0)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] +audio = ["pystoi (>=0.3.0)", "torchaudio (>=0.10.0)"] +detection = ["pycocotools (>2.0.0)", "torchvision (>=0.8)"] +dev = ["SciencePlots (>=2.0.0)", "bert-score (==0.3.13)", "cloudpickle (>1.3)", "coverage (==7.3.0)", "dython (<=0.7.4)", "fairlearn", "fast-bss-eval (>=0.1.0)", "fire (<=0.5.0)", "huggingface-hub (<0.16)", "jiwer (>=2.3.0)", "kornia (>=0.6.7)", "lpips (<=0.1.4)", "matplotlib (>=3.2.0)", "mir-eval (>=0.6)", "mypy (==1.5.1)", "netcal (>1.0.0)", "nltk (>=3.6)", "numpy (<1.25.0)", "pandas (>1.0.0)", "pandas (>=1.4.0)", "phmdoctest (==1.4.0)", "piq (<=0.8.0)", "psutil (<=5.9.5)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-doctestplus (==1.0.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "pytorch-msssim (==1.0.0)", "regex (>=2021.9.24)", "requests (<=2.31.0)", "rouge-score (>0.1.0)", "sacrebleu (>=2.0.0)", "scikit-image (>=0.19.0)", "scikit-learn (>=1.1.1)", "scipy (>1.0.0)", "sewar (>=0.4.4)", "statsmodels (>0.13.5)", "torch-complex (<=0.4.3)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] +image = ["lpips (<=0.1.4)", "scipy (>1.0.0)", "torch-fidelity (<=0.4.0)", "torchvision (>=0.8)"] +multimodal = ["piq (<=0.8.0)", "transformers (>=4.10.0)"] +test = ["bert-score (==0.3.13)", "cloudpickle (>1.3)", "coverage (==7.3.0)", "dython (<=0.7.4)", "fairlearn", "fast-bss-eval (>=0.1.0)", "fire (<=0.5.0)", "huggingface-hub (<0.16)", "jiwer (>=2.3.0)", "kornia (>=0.6.7)", "mir-eval (>=0.6)", "netcal (>1.0.0)", "numpy (<1.25.0)", "pandas (>1.0.0)", "pandas (>=1.4.0)", "phmdoctest (==1.4.0)", "psutil (<=5.9.5)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-doctestplus (==1.0.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "pytorch-msssim (==1.0.0)", "requests (<=2.31.0)", "rouge-score (>0.1.0)", "sacrebleu (>=2.0.0)", "scikit-image (>=0.19.0)", "scikit-learn (>=1.1.1)", "scipy (>1.0.0)", "sewar (>=0.4.4)", "statsmodels (>0.13.5)", "torch-complex (<=0.4.3)"] +text = ["nltk (>=3.6)", "regex (>=2021.9.24)", "tqdm (>=4.41.0)", "transformers (>4.4.0)"] +typing = ["mypy (==1.5.1)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] +visual = ["SciencePlots (>=2.0.0)", "matplotlib (>=3.2.0)"] + +[[package]] +name = "tornado" +version = "6.3.3" +description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." +optional = false +python-versions = ">= 3.8" +files = [ {file = "tornado-6.3.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:502fba735c84450974fec147340016ad928d29f1e91f49be168c0a4c18181e1d"}, {file = "tornado-6.3.3-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:805d507b1f588320c26f7f097108eb4023bbaa984d63176d1652e184ba24270a"}, {file = "tornado-6.3.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bd19ca6c16882e4d37368e0152f99c099bad93e0950ce55e71daed74045908f"}, @@ -3056,134 +2684,235 @@ tornado = [ {file = "tornado-6.3.3-cp38-abi3-win_amd64.whl", hash = "sha256:22d3c2fa10b5793da13c807e6fc38ff49a4f6e1e3868b0a6f4164768bb8e20f5"}, {file = "tornado-6.3.3.tar.gz", hash = "sha256:e7d8db41c0181c80d76c982aacc442c0783a2c54d6400fe028954201a2e032fe"}, ] -tqdm = [ + +[[package]] +name = "tqdm" +version = "4.66.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, ] -transformers = [ + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "transformers" +version = "4.33.1" +description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +optional = false +python-versions = ">=3.8.0" +files = [ {file = "transformers-4.33.1-py3-none-any.whl", hash = "sha256:0630c2d26448d7c6cb78435e6c43910c89e99387badea6be1f565ffa3f093f1d"}, {file = "transformers-4.33.1.tar.gz", hash = "sha256:744265e9f0724d22c229938f28376af54abce730ef647f35bd1685abf49912a4"}, ] -typeguard = [ + +[package.dependencies] +filelock = "*" +huggingface-hub = ">=0.15.1,<1.0" +numpy = ">=1.17" +packaging = ">=20.0" +pyyaml = ">=5.1" +regex = "!=2019.12.17" +requests = "*" +safetensors = ">=0.3.1" +tokenizers = ">=0.11.1,<0.11.3 || >0.11.3,<0.14" +tqdm = ">=4.27" + +[package.extras] +accelerate = ["accelerate (>=0.20.3)"] +agents = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.10,!=1.12.0)"] +all = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"] +audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +codecarbon = ["codecarbon (==1.2.0)"] +deepspeed = ["accelerate (>=0.20.3)", "deepspeed (>=0.9.3)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +docs = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"] +docs-specific = ["hf-doc-builder"] +fairscale = ["fairscale (>0.3)"] +flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"] +flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +ftfy = ["ftfy"] +integrations = ["optuna", "ray[tune]", "sigopt"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +modelcreation = ["cookiecutter (==1.7.3)"] +natten = ["natten (>=0.14.6)"] +onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] +onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] +optuna = ["optuna"] +quality = ["GitPython (<3.1.19)", "black (>=23.1,<24.0)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (>=0.0.241,<=0.0.259)", "urllib3 (<2.0.0)"] +ray = ["ray[tune]"] +retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] +sagemaker = ["sagemaker (>=2.31.0)"] +sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"] +serving = ["fastapi", "pydantic (<2)", "starlette", "uvicorn"] +sigopt = ["sigopt"] +sklearn = ["scikit-learn"] +speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "timeout-decorator"] +tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"] +tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"] +tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +timm = ["timm"] +tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.14)"] +torch = ["accelerate (>=0.20.3)", "torch (>=1.10,!=1.12.0)"] +torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +torch-vision = ["Pillow (<10.0.0)", "torchvision"] +torchhub = ["filelock", "huggingface-hub (>=0.15.1,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "tqdm (>=4.27)"] +video = ["av (==9.2.0)", "decord (==0.6.0)"] +vision = ["Pillow (<10.0.0)"] + +[[package]] +name = "typeguard" +version = "4.1.3" +description = "Run-time type checker for Python" +optional = false +python-versions = ">=3.8" +files = [ {file = "typeguard-4.1.3-py3-none-any.whl", hash = "sha256:5b7453b1e3b35fcfe2d62fa4ec500d05e6f2f2eb46f4126ae964677fcc384fff"}, {file = "typeguard-4.1.3.tar.gz", hash = "sha256:7d4264cd631ac1157c5bb5ec992281b4f1e2ba7a35db91bc15f442235e244803"}, ] -types-requests = [ + +[package.dependencies] +importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} +typing-extensions = {version = ">=4.7.0", markers = "python_version < \"3.12\""} + +[package.extras] +doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)"] +test = ["coverage[toml] (>=7)", "mypy (>=1.2.0)", "pytest (>=7)"] + +[[package]] +name = "types-requests" +version = "2.31.0.2" +description = "Typing stubs for requests" +optional = false +python-versions = "*" +files = [ {file = "types-requests-2.31.0.2.tar.gz", hash = "sha256:6aa3f7faf0ea52d728bb18c0a0d1522d9bfd8c72d26ff6f61bfc3d06a411cf40"}, {file = "types_requests-2.31.0.2-py3-none-any.whl", hash = "sha256:56d181c85b5925cbc59f4489a57e72a8b2166f18273fd8ba7b6fe0c0b986f12a"}, ] -types-urllib3 = [ + +[package.dependencies] +types-urllib3 = "*" + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +description = "Typing stubs for urllib3" +optional = false +python-versions = "*" +files = [ {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, ] -typing-extensions = [ + +[[package]] +name = "typing-extensions" +version = "4.7.1" +description = "Backported and Experimental Type Hints for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, ] -tzdata = [ + +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, ] -urllib3 = [ + +[[package]] +name = "urllib3" +version = "2.0.4" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.7" +files = [ {file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"}, {file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"}, ] -virtualenv = [ + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "virtualenv" +version = "20.24.4" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ {file = "virtualenv-20.24.4-py3-none-any.whl", hash = "sha256:29c70bb9b88510f6414ac3e55c8b413a1f96239b6b789ca123437d5e892190cb"}, {file = "virtualenv-20.24.4.tar.gz", hash = "sha256:772b05bfda7ed3b8ecd16021ca9716273ad9f4467c801f27e83ac73430246dca"}, ] -xdoctest = [ + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<4" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + +[[package]] +name = "xdoctest" +version = "1.1.1" +description = "A rewrite of the builtin doctest module" +optional = false +python-versions = ">=3.6" +files = [ {file = "xdoctest-1.1.1-py3-none-any.whl", hash = "sha256:d59d4ed91cb92e4430ef0ad1b134a2bef02adff7d2fb9c9f057547bee44081a2"}, {file = "xdoctest-1.1.1.tar.gz", hash = "sha256:2eac8131bdcdf2781b4e5a62d6de87f044b730cc8db8af142a51bb29c245e779"}, ] -xxhash = [ - {file = "xxhash-3.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:70ef7288d1cb1ad16e02d101ea43bb0e392d985d60b9b0035aee80663530960d"}, - {file = "xxhash-3.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:44ff8c673cab50be46784e0aec62aa6f0ca9ea765e2b0690e8945d0cd950dcaf"}, - {file = "xxhash-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfebc90273ae2beb813d8118a2bfffb5a5a81ac054fbfd061ea18fd0a81db0ac"}, - {file = "xxhash-3.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9084e68bedbd665c7e9241a7b597c28f4775edeb3941bf608ecb38732a5f8fb5"}, - {file = "xxhash-3.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d72493a14a3e89564b1a6c7400b9b40621e8f4692410706ef27c66aeadc7b431"}, - {file = "xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98779cbe9068dd7734cc3210693894d5cc9b156920e9c336f10fb99f46bebbd8"}, - {file = "xxhash-3.3.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:499f8a12767dd28b98ab6b7c7da7d294564e4c9024a2aaa5d0b0b98a8bef2f92"}, - {file = "xxhash-3.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4dabda7f42c548f98d8e07e390bda2953fc58302c0e07ded7b3fe0637e7ecd2f"}, - {file = "xxhash-3.3.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c416409646c793c46370f0f1859253302ee70aeda5278c2a0ca41462f8ec1244"}, - {file = "xxhash-3.3.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b8bd31aaad8a80a7302730676cec26bea3ef1fd9835875aa47fea073aca9fe05"}, - {file = "xxhash-3.3.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:3af8e3bcd630f905efbdfe7a51b51fc1ca3c9dca8b155f841925f3ad41685d41"}, - {file = "xxhash-3.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d86b79c707fc7025d967af71db652429a06a8179175e45bd2e9f17b8af6f5949"}, - {file = "xxhash-3.3.0-cp310-cp310-win32.whl", hash = "sha256:98fe771f36ee9d3a1f5741424a956a2ba9651d9508a9f64a024b57f2cf796414"}, - {file = "xxhash-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:0a65131f7f731ecf7e3dd27f09d877aff3000a79a446caaa2c0d8d0ec0bc7186"}, - {file = "xxhash-3.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a9761e425e79d23797fa0bec2d781dbadb9fe5dcc2bf69030855f5e393c3bec8"}, - {file = "xxhash-3.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d28c7ef1deb3c3ac5f5290176ca3d501daa97c2e1f7443bf5d8b61ac651794b2"}, - {file = "xxhash-3.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:701b7cefffc25de1b7ddfae6505da70a3b3a11e312c2e2b33b09e180bbceb43d"}, - {file = "xxhash-3.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b1644f8b8e19a242c3047a089541067248a651038cabb9fcab3c13eb1dfcd757"}, - {file = "xxhash-3.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:20e7d0e3488cc0f0dbe360731b7fe32e1f2df46bf2de2db3317d301efb93084c"}, - {file = "xxhash-3.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:156c52eca2b20f9839723bef0b929a290f6c2f1c98ccb24e82f58f96f3c16007"}, - {file = "xxhash-3.3.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d6ce4d3828d79044ed08994e196c20f69c18133ed8a4286afe3e98989adeeac"}, - {file = "xxhash-3.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b85b63757ade2439c8d7d71842c40d42c0ab3b69279ed02afbd3b1635f7d2b4b"}, - {file = "xxhash-3.3.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b2b9051e40b7b649a9a2a38fb223ca6a593d332012df885746b81968948f9435"}, - {file = "xxhash-3.3.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:81b7ce050f26fc1daaaa0d24e320815306736d14608e1ba31920e693a7ca9afb"}, - {file = "xxhash-3.3.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:7442500fcce71669953ca959682dcd47452bc3f9c95c8d88315874aeabec9f82"}, - {file = "xxhash-3.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:36a05bf59a515cfb07f3f83373c527fff2ecaa77eaf30c968c788aea582070a1"}, - {file = "xxhash-3.3.0-cp311-cp311-win32.whl", hash = "sha256:da16f9cd62c6fde74683be1b28c28ef865e706da13e3bee4ba836fcc520de0cc"}, - {file = "xxhash-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:40fd49ef6964b1c90c0bea63cd184f6d0b36e59144a080e8b3ac2c4c06bf6bf2"}, - {file = "xxhash-3.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:672c60cce1f8026ae32c651f877aa64f342876083a36a4b1ff91bc876aaf0e34"}, - {file = "xxhash-3.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bb6c83d7a65dd3065566c77425ba72df96982174e8ef613d809052d68ae77ab"}, - {file = "xxhash-3.3.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a4170f3016b621e3200ebfcc18de6f50eb8e8fc1303e16324b1f5625afd51b57"}, - {file = "xxhash-3.3.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bfb9c45d502ab38c0f4edf98a678694ae0f345613ef4900ade98c71f64db4d78"}, - {file = "xxhash-3.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48af026a2b1569666da42a478248a1f03f4e2350a34eb661afe3cb45429ca1d7"}, - {file = "xxhash-3.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe627de8fe8ddfa8b6477bda4ae5d5843ad1a0c83601dcff72247039465cc901"}, - {file = "xxhash-3.3.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:427fc60a188e345534f35b0aa76f7640c5ddf0354f1c9ad826a2bc086282982d"}, - {file = "xxhash-3.3.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:d80acb20c7f268fe3150ac0be6a6b798062af56a1795eef855b26c9eae11a99c"}, - {file = "xxhash-3.3.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:e71100818943422d1fbbe460e7be7fc4f2d2ba9371b2a745eb09e29ef0493f4a"}, - {file = "xxhash-3.3.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:e3b9bb5fdbe284c7b61c5d82c76688e52bbaf48ab1e53de98c072cc696fa331f"}, - {file = "xxhash-3.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1e25f6c8c46cf1ed8237f610abb231093a748c97d6c2c092789a7cad7e7ef290"}, - {file = "xxhash-3.3.0-cp37-cp37m-win32.whl", hash = "sha256:928208dfecc563be59ae91868d1658e78809cb1e6a0bd74960a96c915db6390c"}, - {file = "xxhash-3.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:bd1b4531a66da6dde1974662c1fd6fb1a2f27e40542e3df5e5e5dbab8ea4aee7"}, - {file = "xxhash-3.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:deebb296df92e082b6d0171a7d6227b503e2897cea4f8bdd3d708094974d4cf6"}, - {file = "xxhash-3.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cd96e9cb0e2baa294e6d572207d9731c3bb8e2511f1ff70f2bf17266b4488bd9"}, - {file = "xxhash-3.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3756b44bf247e422a2e47a38f25d03cf4a5ed539fdc2be3c60043e872e6ff13d"}, - {file = "xxhash-3.3.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:69550c3c053b8f135ceac97b85dc1b2bc54b7613a966f550f32b43bed81c788a"}, - {file = "xxhash-3.3.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fc8736fc3e0c5aad435520873b9d2e27ddcc5a830b07e00e9c4d3a61ded9675"}, - {file = "xxhash-3.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80ead7774392efbd95f9f701155048f9ca26cf55133db6f5bb5a0ec69376bda5"}, - {file = "xxhash-3.3.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8737c9b3fd944d856faafa92c95f6198649ad57987935b6d965d086938be917"}, - {file = "xxhash-3.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2c8e078d0b9f85212801c41bd9eec8122003929686b0ee33360ffbfdf1a189ab"}, - {file = "xxhash-3.3.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f399269d20ef1dd910331f9ad49e8510c3ba2aa657b623293b536038f266a5c5"}, - {file = "xxhash-3.3.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f3661decef5f9ff7ab50edbef463bf7dc717621b56755dbae5458a946a033b10"}, - {file = "xxhash-3.3.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:5ec374d0f1e7d43ef48a4ff643600833d7a325ecc6933b4d6ad9282f55751cf7"}, - {file = "xxhash-3.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:39a947ff02d9a85673f5ce1f6f34059e24c714a797440485bd81b2c3cb69a7ff"}, - {file = "xxhash-3.3.0-cp38-cp38-win32.whl", hash = "sha256:4a4f0645a0ec03b229fb04f2e66bdbcb1ffd341a70d6c86c3ee015ffdcd70fad"}, - {file = "xxhash-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:8af5a687c0fb4357c230eec8a57ca07d3172faa3cb69beb0cbad40672ae6fa4b"}, - {file = "xxhash-3.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e5bfafda019ecc6202af6f3cf08220fa66af9612ba16ef831033ae3ac7bd1f89"}, - {file = "xxhash-3.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d113b433bc817adf845689a051363777835577858263ec4325d1934fcb7e394"}, - {file = "xxhash-3.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56aacf4bf65f575c0392be958aceff719d850950bb6af7d804b32d4bc293159c"}, - {file = "xxhash-3.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f5d3e4e0937dad05585e9bd772bbdf0ca40cd8b2f54789d7a1f3091b608118c"}, - {file = "xxhash-3.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23605d7fc67bc7daa0d263b3a26de3375cfcc0b51ab7de5026625415c05b6fed"}, - {file = "xxhash-3.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe525be0392d493558a2b10d764bcaae9850cc262b417176a8b001f16e085fc6"}, - {file = "xxhash-3.3.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b234d08786884f5c8d55dfebb839cfbd846d812e3a052c39ca7e8ce7055fed68"}, - {file = "xxhash-3.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b031395b4b9c3085d9ea1ce89896ab01a65fc63172b2bfda5dd318fefe5e2f93"}, - {file = "xxhash-3.3.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:5afe44da46b48c75169e622a532dca3fe585343c0577cfd7c18ecd3f1200305d"}, - {file = "xxhash-3.3.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c59f233f38b6a49d5e4ddf16be910a5bbf36a2989b6b2c8591853fb9f5a5e691"}, - {file = "xxhash-3.3.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:ed016e278c5c4633270903c7cf3b9dfb0bd293b7335e43fe695cb95541da53c9"}, - {file = "xxhash-3.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a8bd6612fb35487e9ab329bb37b3df44f58baf752010dde9282593edbfed7e7"}, - {file = "xxhash-3.3.0-cp39-cp39-win32.whl", hash = "sha256:015a0498bde85364abc53fcc713af962dd4555391929736d9c0ff2c555436a03"}, - {file = "xxhash-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:06a484097af32caf1cfffadd60c3ca140c9e52b40a551fb1f6f0fdfd6f7f8977"}, - {file = "xxhash-3.3.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6c3809740124bbc777d29e3ae53de24f4c13fd5e62878086a8feadf0dcb654a5"}, - {file = "xxhash-3.3.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae092f0daaeece2acdd6ec46e2ab307d8d6f22b01ecca14dc6078844dbd88339"}, - {file = "xxhash-3.3.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3498e72ff2610b049b97bb81d1ea6e7bfa5b7a45efb3f255d77ec2fa2bc91653"}, - {file = "xxhash-3.3.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0004dded9d86f129961326e980420187640fb7ba65a184009429861c1d09df7"}, - {file = "xxhash-3.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:41c8bfd27191928bae6fd2b66872965532267785094a03c0ee5f358d9dba51c2"}, - {file = "xxhash-3.3.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:71db8498e329cef3588b0617f762a3fe31d899872e76a68ce2840e35a1318a5b"}, - {file = "xxhash-3.3.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d1d24d71b6209bc0124286932c4f0660c1103cb996fe34cb374bc12ac251940"}, - {file = "xxhash-3.3.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61004587a09b5b385e43d95ffe3a76c9d934dfd79ea38272d5c20ddfba8eab8f"}, - {file = "xxhash-3.3.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f0c92e3fa826425c73acafb31e022a719c85423847a9433d3a9e61e4ac97543"}, - {file = "xxhash-3.3.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:367e03f1484ce471c94e731b98f5e4a05b43e7188b16692998e1cc89fd1159a5"}, - {file = "xxhash-3.3.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ed04c47dfaab98fcda0b748af9ee6fe8c888a0a0fbd13720e0f0221671e387e1"}, - {file = "xxhash-3.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7cbfde62516435ca198220aff048a8793383cb7047c7b88714a061968bca786d"}, - {file = "xxhash-3.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73682225faa973ee56743f0fcd36bfcbfec503be258e0e420fb34313f52f1e7b"}, - {file = "xxhash-3.3.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d49efdce2086c2c506af20ed18a1115b40af7aad6d4ee27cb31d7c810585a3f2"}, - {file = "xxhash-3.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:546a0bb8e5a657cadf0da290b30ccd561cb89c256a5421ab8d5eb12eaf087349"}, - {file = "xxhash-3.3.0.tar.gz", hash = "sha256:c3f9e322b1ebeebd44e3d9d2d9b124e0c550c1ef41bd552afdcdd719516ee41a"}, -] -yarl = [ + +[package.dependencies] +colorama = {version = "*", optional = true, markers = "platform_system == \"Windows\" and extra == \"colors\""} +Pygments = {version = "*", optional = true, markers = "python_version >= \"3.5.0\" and extra == \"colors\""} +six = "*" + +[package.extras] +all = ["IPython", "IPython", "Pygments", "Pygments", "attrs", "codecov", "colorama", "debugpy", "debugpy", "debugpy", "debugpy", "debugpy", "ipykernel", "ipykernel", "ipython-genutils", "jedi", "jinja2", "jupyter-client", "jupyter-client", "jupyter-core", "nbconvert", "pyflakes", "pytest", "pytest", "pytest", "pytest-cov", "six", "tomli", "typing"] +all-strict = ["IPython (==7.10.0)", "IPython (==7.23.1)", "Pygments (==2.0.0)", "Pygments (==2.4.1)", "attrs (==19.2.0)", "codecov (==2.0.15)", "colorama (==0.4.1)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.3.0)", "debugpy (==1.6.0)", "ipykernel (==5.2.0)", "ipykernel (==6.0.0)", "ipython-genutils (==0.2.0)", "jedi (==0.16)", "jinja2 (==3.0.0)", "jupyter-client (==6.1.5)", "jupyter-client (==7.0.0)", "jupyter-core (==4.7.0)", "nbconvert (==6.0.0)", "pyflakes (==2.2.0)", "pytest (==4.6.0)", "pytest (==4.6.0)", "pytest (==6.2.5)", "pytest-cov (==3.0.0)", "six (==1.11.0)", "tomli (==0.2.0)", "typing (==3.7.4)"] +colors = ["Pygments", "Pygments", "colorama"] +jupyter = ["IPython", "IPython", "attrs", "debugpy", "debugpy", "debugpy", "debugpy", "debugpy", "ipykernel", "ipykernel", "ipython-genutils", "jedi", "jinja2", "jupyter-client", "jupyter-client", "jupyter-core", "nbconvert"] +optional = ["IPython", "IPython", "Pygments", "Pygments", "attrs", "colorama", "debugpy", "debugpy", "debugpy", "debugpy", "debugpy", "ipykernel", "ipykernel", "ipython-genutils", "jedi", "jinja2", "jupyter-client", "jupyter-client", "jupyter-core", "nbconvert", "pyflakes", "tomli"] +optional-strict = ["IPython (==7.10.0)", "IPython (==7.23.1)", "Pygments (==2.0.0)", "Pygments (==2.4.1)", "attrs (==19.2.0)", "colorama (==0.4.1)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.3.0)", "debugpy (==1.6.0)", "ipykernel (==5.2.0)", "ipykernel (==6.0.0)", "ipython-genutils (==0.2.0)", "jedi (==0.16)", "jinja2 (==3.0.0)", "jupyter-client (==6.1.5)", "jupyter-client (==7.0.0)", "jupyter-core (==4.7.0)", "nbconvert (==6.0.0)", "pyflakes (==2.2.0)", "tomli (==0.2.0)"] +runtime-strict = ["six (==1.11.0)"] +tests = ["codecov", "pytest", "pytest", "pytest", "pytest-cov", "typing"] +tests-binary = ["cmake", "cmake", "ninja", "ninja", "pybind11", "pybind11", "scikit-build", "scikit-build"] +tests-binary-strict = ["cmake (==3.21.2)", "cmake (==3.25.0)", "ninja (==1.10.2)", "ninja (==1.11.1)", "pybind11 (==2.10.3)", "pybind11 (==2.7.1)", "scikit-build (==0.11.1)", "scikit-build (==0.16.1)"] +tests-strict = ["codecov (==2.0.15)", "pytest (==4.6.0)", "pytest (==4.6.0)", "pytest (==6.2.5)", "pytest-cov (==3.0.0)", "typing (==3.7.4)"] + +[[package]] +name = "yarl" +version = "1.9.2" +description = "Yet another URL library" +optional = false +python-versions = ">=3.7" +files = [ {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"}, {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"}, {file = "yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0c77533b5ed4bcc38e943178ccae29b9bcf48ffd1063f5821192f23a1bd27b9"}, @@ -3259,7 +2988,27 @@ yarl = [ {file = "yarl-1.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:61016e7d582bc46a5378ffdd02cd0314fb8ba52f40f9cf4d9a5e7dbef88dee18"}, {file = "yarl-1.9.2.tar.gz", hash = "sha256:04ab9d4b9f587c06d801c2abfe9317b77cdf996c65a90d5e84ecc45010823571"}, ] -zipp = [ + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[[package]] +name = "zipp" +version = "3.16.2" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.8" +files = [ {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, ] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + +[metadata] +lock-version = "2.0" +python-versions = "^3.9" +content-hash = "3da71ba294a8a62922d36dea10598728195f1ffd1b49b0fee8da52072380e0b6" diff --git a/pyproject.toml b/pyproject.toml index f4c45b89..4d153bdc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,12 +33,12 @@ python = "^3.9" torch = ">=1.10" pytorch-lightning = "^2" torchmetrics = "^1" -datasets = "^2.13" transformers = "^4.18" # pin to version below 2023.9.0 because that causes problems when using load_dataset with local files (e.g. json) fsspec = "<2023.9.0" # required for testing, but, strangely, tests fail, if just in dev-dependencies absl-py = "^1.0.0" +pandas = "^2.0.0" [tool.poetry.dev-dependencies] Pygments = ">=2.10.0" diff --git a/src/pytorch_ie/__init__.py b/src/pytorch_ie/__init__.py index 29badd60..d7146b10 100644 --- a/src/pytorch_ie/__init__.py +++ b/src/pytorch_ie/__init__.py @@ -1,7 +1,6 @@ # flake8: noqa from pytorch_ie.auto import AutoModel, AutoPipeline, AutoTaskModule -from pytorch_ie.data import * from pytorch_ie.models import * from pytorch_ie.pipeline import Pipeline from pytorch_ie.taskmodules import * diff --git a/src/pytorch_ie/core/module_mixins.py b/src/pytorch_ie/core/module_mixins.py index 4d151bc8..be6f51ab 100644 --- a/src/pytorch_ie/core/module_mixins.py +++ b/src/pytorch_ie/core/module_mixins.py @@ -2,7 +2,6 @@ from typing import Optional, Type from pytorch_ie.core.document import Document -from pytorch_ie.data.dataset_dict import DatasetDict logger = logging.getLogger(__name__) @@ -15,7 +14,7 @@ class RequiresDocumentTypeMixin: def document_type(self) -> Optional[Type[Document]]: return self.DOCUMENT_TYPE - def convert_dataset(self, dataset: DatasetDict) -> DatasetDict: + def convert_dataset(self, dataset: "pie_datasets.DatasetDict") -> "pie_datasets.DatasetDict": # type: ignore name = type(self).__name__ # auto-convert the dataset if a document type is specified if self.document_type is not None: diff --git a/src/pytorch_ie/data/__init__.py b/src/pytorch_ie/data/__init__.py deleted file mode 100644 index efdc764c..00000000 --- a/src/pytorch_ie/data/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from .builder import GeneratorBasedBuilder -from .dataset import Dataset, IterableDataset -from .dataset_dict import DatasetDict -from .dataset_formatter import DocumentFormatter -from .document_conversion import ( - text_based_document_to_token_based, - token_based_document_to_text_based, - tokenize_document, -) - -__all__ = [ - "GeneratorBasedBuilder", - "Dataset", - "IterableDataset", - "DatasetDict", - "DocumentFormatter", - "text_based_document_to_token_based", - "token_based_document_to_text_based", - "tokenize_document", -] diff --git a/src/pytorch_ie/data/builder.py b/src/pytorch_ie/data/builder.py deleted file mode 100644 index 635826f9..00000000 --- a/src/pytorch_ie/data/builder.py +++ /dev/null @@ -1,256 +0,0 @@ -import abc -from typing import Any, Callable, Dict, List, Optional, Sequence, Type, TypeVar, Union, overload - -import datasets as hf_datasets - -from pytorch_ie.core.document import Document -from pytorch_ie.data.dataset import ( - Dataset, - DocumentConvertersType, - IterableDataset, - decorate_convert_to_dict_of_lists, - get_pie_dataset_type, -) -from pytorch_ie.utils.hydra import resolve_target - - -def get_general_dataset_builder_parent_class( - obj: hf_datasets.builder.DatasetBuilder, -) -> Type[hf_datasets.builder.DatasetBuilder]: - general_dataset_builder_parent_classes = [ - cls - for cls in hf_datasets.builder.DatasetBuilder.__subclasses__() - if cls != PieDatasetBuilder and isinstance(obj, cls) - ] - if len(general_dataset_builder_parent_classes) != 1: - raise TypeError("can not determine general dataset builder parent class of the object") - return general_dataset_builder_parent_classes[0] - - -class PieDatasetBuilder(hf_datasets.builder.DatasetBuilder): - # The default pytorch-ie document type for the dataset. - DOCUMENT_TYPE: Optional[Type[Document]] = None - # A mapping from config names to PIE document types. Use this to specify individual - # document types per config. - DOCUMENT_TYPES: Dict[str, Type[Document]] = {} - - # The default path to the Huggingface dataset loading script that will be used as base dataset. - BASE_DATASET_PATH: Optional[str] = None - # A mapping from config names to Huggingface dataset loading script paths. Use this to specify individual - # base datasets for each config. - BASE_DATASET_PATHS: Dict[str, str] = {} - - # Define kwargs to create base configs. This should contain config names as keys - # and the respective config kwargs dicts as values. If the config name is not contained, a new entry - # {"name": config_name} will be created for it, i.e. the config name is passed as base config name. - # This default behaviour can be disabled by setting BASE_CONFIG_KWARGS_DICT to None. - BASE_CONFIG_KWARGS_DICT: Optional[Dict[Optional[str], Dict[str, Any]]] = {} - # Define base builder kwargs. This should contain config names as keys and the respective - # builder kwargs dicts as values. - BASE_BUILDER_KWARGS_DICT: Optional[Dict[Optional[str], Dict[str, Any]]] = None - - # Define document converters. This should be a mapping from document types as keys to the respective - # document converters as values. The document converters can be either callables or dicts - # that map from original field names to new field names. If a callable is provided, it will be used to - # convert the document. If a dict is provided, it will be used to rename the fields of the - # document (this is done by renaming the columns which is much more efficient). - DOCUMENT_CONVERTERS: DocumentConvertersType = {} - - def __init__( - self, - base_dataset_kwargs: Optional[Dict[str, Any]] = None, - document_converters: Optional[ - Dict[Union[Type[Document], str], Union[Callable[..., Document], Dict[str, str], str]] - ] = None, - **kwargs, - ): - self.base_builder = None - config_name = kwargs.get("config_name", None) - base_dataset_path = self.BASE_DATASET_PATHS.get(config_name, self.BASE_DATASET_PATH) - if base_dataset_path is not None: - base_dataset_kwargs = base_dataset_kwargs or {} - base_builder_kwargs: Dict[str, Any] = {} - - # get base config kwargs from mapping - if self.BASE_CONFIG_KWARGS_DICT is not None: - if config_name in self.BASE_CONFIG_KWARGS_DICT: - config_kwargs = self.BASE_CONFIG_KWARGS_DICT[config_name] - else: - # if the config name is not in BASE_CONFIG_KWARGS_DICT, - # we pass it as base config name - config_kwargs = {"name": config_name} - base_builder_kwargs.update(config_kwargs) - - # get base builder kwargs from mapping - if self.BASE_BUILDER_KWARGS_DICT is not None: - base_builder_kwargs.update(self.BASE_BUILDER_KWARGS_DICT[config_name]) - - base_builder_kwargs.update(base_dataset_kwargs) - self.base_builder = hf_datasets.load.load_dataset_builder( - path=base_dataset_path, - **base_builder_kwargs, - ) - # Ensure that self and self.base_builder are derived from the same subclass of - # hf_datasets.builder.DatasetBuilder. - base_builder_general_parent_class = get_general_dataset_builder_parent_class( - self.base_builder - ) - self_general_parent_class = get_general_dataset_builder_parent_class(self) - if base_builder_general_parent_class != self_general_parent_class: - raise TypeError( - f"The PyTorch-IE dataset builder class '{type(self).__name__}' is derived from " - f"{self_general_parent_class}, but the base builder is not which is not allowed. The base builder " - f"is of type '{type(self.base_builder).__name__}' that is derived from " - f"{base_builder_general_parent_class}. Consider to derive your PyTorch-IE dataset builder " - f"'{type(self).__name__}' from a PyTorch-IE variant of " - f"'{base_builder_general_parent_class.__name__}'." - ) - - # append the base_builder config_id to the hash, otherwise the base_builder config arguments - # are not respected in the cache fingerprint - if "hash" in kwargs: - kwargs["hash"] = f"{kwargs['hash']}-{self.base_builder.config_id}" - - # set base path to base builder base path. This is required so that the download manager - # works correctly with relative paths. - kwargs["base_path"] = self.base_builder.base_path - - super().__init__(**kwargs) - - self.document_converters = dict(self.DOCUMENT_CONVERTERS) - if document_converters is not None: - for document_type_or_str, document_converter_or_str in document_converters.items(): - document_type = resolve_target(document_type_or_str) - if isinstance(document_type, type) and issubclass(document_type, Document): - document_converter: Union[Callable[..., Any], dict[str, str]] - if isinstance(document_converter_or_str, str): - document_converter = resolve_target(document_converter_or_str) - else: - document_converter = document_converter_or_str - - self.document_converters[document_type] = document_converter - else: - raise TypeError( - f"The key '{document_type_or_str}' for one of the converters " - f"can not be resolved to a document type." - ) - - def _info(self): - return self.base_builder._info() - - def _split_generators(self, dl_manager): - return self.base_builder._split_generators(dl_manager) - - @property - def document_type(self) -> Optional[Type[Document]]: - return self.DOCUMENT_TYPES.get(self.config.name, self.DOCUMENT_TYPE) - - @abc.abstractmethod - def _generate_document(self, example, **kwargs): - pass - - def _generate_document_kwargs(self, dataset): - return None - - @overload # type: ignore - def _convert_dataset_single(self, dataset: hf_datasets.IterableDataset) -> IterableDataset: - ... - - @overload # type: ignore - def _convert_dataset_single(self, dataset: hf_datasets.Dataset) -> Dataset: - ... - - def _convert_dataset_single( - self, dataset: Union[hf_datasets.Dataset, hf_datasets.IterableDataset] - ) -> Union[Dataset, IterableDataset]: - document_type = self.document_type - if document_type is None: - raise TypeError( - f"the builder has no DOCUMENT_TYPE or DOCUMENT_TYPES[{self.config.name}] defined" - ) - - fn = decorate_convert_to_dict_of_lists(self._generate_document) - fn_kwargs = self._generate_document_kwargs(dataset) - mapped_dataset = dataset.map(fn, fn_kwargs=fn_kwargs) - dataset_type = get_pie_dataset_type(mapped_dataset) - result = dataset_type.from_hf_dataset( - dataset=mapped_dataset, - document_type=document_type, - document_converters=dict(self.document_converters), - ) - return result - - @overload # type: ignore - def _convert_datasets(self, datasets: hf_datasets.DatasetDict) -> hf_datasets.DatasetDict: - ... - - @overload # type: ignore - def _convert_datasets( - self, datasets: hf_datasets.IterableDatasetDict - ) -> hf_datasets.IterableDatasetDict: - ... - - @overload # type: ignore - def _convert_datasets(self, datasets: hf_datasets.IterableDataset) -> IterableDataset: - ... - - @overload # type: ignore - def _convert_datasets(self, datasets: hf_datasets.Dataset) -> Dataset: - ... - - def _convert_datasets( - self, - datasets: Union[ - hf_datasets.Dataset, - hf_datasets.IterableDataset, - hf_datasets.DatasetDict, - hf_datasets.IterableDatasetDict, - ], - ) -> Union[Dataset, IterableDataset, hf_datasets.DatasetDict, hf_datasets.IterableDatasetDict]: - if isinstance(datasets, dict): - return type(datasets)( - {k: self._convert_dataset_single(v) for k, v in datasets.items()} - ) - else: - return self._convert_dataset_single(datasets) - - def as_dataset( - self, - split: Optional[hf_datasets.Split] = None, - run_post_process=True, - verification_mode: Optional[Union[hf_datasets.VerificationMode, str]] = None, - ignore_verifications="deprecated", - in_memory=False, - ) -> Union[Dataset, hf_datasets.DatasetDict]: - datasets = super().as_dataset( - split=split, - run_post_process=run_post_process, - ignore_verifications=ignore_verifications, - in_memory=in_memory, - verification_mode=verification_mode, - ) - converted_datasets = self._convert_datasets(datasets=datasets) - return converted_datasets - - def as_streaming_dataset( - self, - split: Optional[str] = None, - base_path: Optional[str] = None, - ) -> Union[IterableDataset, hf_datasets.IterableDatasetDict]: # type: ignore - datasets: Union[ - hf_datasets.IterableDataset, hf_datasets.IterableDatasetDict - ] = super().as_streaming_dataset( - split=split, base_path=base_path - ) # type: ignore - converted_datasets = self._convert_datasets(datasets=datasets) - return converted_datasets - - -class GeneratorBasedBuilder(PieDatasetBuilder, hf_datasets.builder.GeneratorBasedBuilder): - def _generate_examples(self, *args, **kwargs): - return self.base_builder._generate_examples(*args, **kwargs) - - -class ArrowBasedBuilder(PieDatasetBuilder, hf_datasets.builder.ArrowBasedBuilder): - def _generate_tables(self, *args, **kwargs): - return self.base_builder._generate_tables(*args, **kwargs) diff --git a/src/pytorch_ie/data/common.py b/src/pytorch_ie/data/common.py deleted file mode 100644 index e3213b2e..00000000 --- a/src/pytorch_ie/data/common.py +++ /dev/null @@ -1,40 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Optional, Union - -from .dataset import Dataset, IterableDataset - - -class EnterDatasetMixin(ABC): - """Mixin for processors that enter a dataset context.""" - - @abstractmethod - def enter_dataset( - self, dataset: Union[Dataset, IterableDataset], name: Optional[str] = None - ) -> None: - """Enter dataset context.""" - - -class ExitDatasetMixin(ABC): - """Mixin for processors that exit a dataset context.""" - - @abstractmethod - def exit_dataset( - self, dataset: Union[Dataset, IterableDataset], name: Optional[str] = None - ) -> None: - """Exit dataset context.""" - - -class EnterDatasetDictMixin(ABC): - """Mixin for processors that enter a dataset dict context.""" - - @abstractmethod - def enter_dataset_dict(self, dataset_dict) -> None: - """Enter dataset dict context.""" - - -class ExitDatasetDictMixin(ABC): - """Mixin for processors that exit a dataset dict context.""" - - @abstractmethod - def exit_dataset_dict(self, dataset_dict) -> None: - """Exit dataset dict context.""" diff --git a/src/pytorch_ie/data/dataset.py b/src/pytorch_ie/data/dataset.py deleted file mode 100644 index 86a17514..00000000 --- a/src/pytorch_ie/data/dataset.py +++ /dev/null @@ -1,585 +0,0 @@ -import logging -from collections.abc import Iterable, Sequence -from functools import wraps -from inspect import Signature, isclass, signature -from typing import Callable, Dict, List, Optional, Set, Tuple, Type, TypeVar, Union, overload - -import datasets -import pandas as pd -from datasets.formatting import _register_formatter - -from pytorch_ie.core.document import Document -from pytorch_ie.data.dataset_formatter import DocumentFormatter - -logger = logging.getLogger(__name__) - -_register_formatter(DocumentFormatter, "document") - - -def decorate_convert_to_dict_of_lists(f): - """ - Decorate the mapped function, so that converts a single Document to a dict, - and a list of Documents into a dict of lists. - """ - - @wraps(f) - def decorated(item, *args, **kwargs): - if isinstance(item, list): - # Convert a list of dicts into a dict of lists. - return pd.DataFrame([e.asdict() for e in f(item, *args, **kwargs)]).to_dict( - orient="list" - ) - else: - return f(item, *args, **kwargs).asdict() - - return decorated - - -E = TypeVar("E") - - -def dl_to_ld(dict_list: Dict[str, List[E]]) -> List[Dict[str, E]]: - # Convert a dict of lists to a list of dicts - return [dict(zip(dict_list, t)) for t in zip(*dict_list.values())] - - -def ld_to_dl( - list_dict: List[Dict[str, E]], keys: Optional[Iterable[str]] = None -) -> Dict[str, List[E]]: - # Convert a list of dicts to a dict of lists. - # Provide keys to create the expected format when lists are empty. - if keys is None: - keys = list_dict[0] - return {k: [dic[k] for dic in list_dict] for k in keys} - - -def decorate_convert_to_document_and_back(f, document_type: Type[Document], batched: bool): - @wraps(f) - def decorated(item, *args, **kwargs): - if batched: - # Convert a list of dicts into a dict of lists. - return ld_to_dl( - [ - e.asdict() - for e in f( - [document_type.fromdict(x) for x in dl_to_ld(item)], *args, **kwargs - ) - ], - # passing the keys allows to work correctly with empty lists - keys=item.keys(), - ) - else: - return f(document_type.fromdict(item), *args, **kwargs).asdict() - - return decorated - - -def _check_fields_for_casting( - field_mapping: Dict[str, str], - current_document_type: Type[Document], - new_document_type: Type[Document], - column_names: list[str], -) -> Tuple[Set[str], Set[str]]: - original_fields = {field.name: field for field in current_document_type.fields()} - new_fields = {field.name: field for field in new_document_type.fields()} - hidden_fields = set(column_names) - set(original_fields) - fields_to_map_not_in_original_fields = ( - set(field_mapping) - set(original_fields) - set(hidden_fields) - ) - if len(fields_to_map_not_in_original_fields) > 0: - raise ValueError( - f"some fields to rename are not in the original document_type or hidden fields: " - f"{fields_to_map_not_in_original_fields}" - ) - mapped_but_not_in_new_fields = set(field_mapping.values()) - set(new_fields) - if len(mapped_but_not_in_new_fields) > 0: - raise ValueError( - f"some renamed fields are not in the new document_type: {mapped_but_not_in_new_fields}" - ) - original_fields_mapped = { - field_mapping.get(f_name, f_name): f for f_name, f in original_fields.items() - } - added_field_names = set(new_fields) - set(original_fields_mapped) - removed_field_names = set(original_fields) - set(new_fields) - set(field_mapping) - - # Sanity checks - kept_field_names = set(original_fields_mapped) & set(new_fields) - for f_name_mapped in kept_field_names: - f = original_fields_mapped[f_name_mapped] - new_f = new_fields[f_name_mapped] - if not ( - f.type == new_f.type - and f.default == new_f.default - and f.default_factory == new_f.default_factory - ): - raise ValueError(f"new field is not the same as old field:\n{new_f}\nvs\n{f}") - - return removed_field_names, added_field_names - - -def _infer_document_type_from_function_return( - function: Callable, strict: bool = True -) -> Optional[Type[Document]]: - # try to infer the document type from the return type annotation of function - return_signature = signature(function).return_annotation - if not return_signature == Signature.empty: - if not isclass(return_signature) or not issubclass(return_signature, Document): - if strict: - raise TypeError( - f"the return type annotation of the function used with map is not a subclass of Document" - ) - else: - logger.warning( - f"the return type annotation of the function used with map is not a subclass of Document" - ) - return None - return return_signature - return None - - -D = TypeVar("D", bound=Document) -DocumentConvertersType = Dict[Type[D], Union[Callable[..., D], Dict[str, str]]] - - -def _get_best_dataset_converter_with_types( - dataset: Union["IterableDataset", "Dataset"], - document_type: Union[Type[Document]], -) -> Tuple[Union[Callable[..., Document], Dict[str, str]], Type[Document], Type[Document]]: - # first try to find an exact match - if document_type in dataset.document_converters: - return dataset.document_converters[document_type], document_type, document_type - - # then try to find a match with a superclass - for registered_dt, candidate_converter in dataset.document_converters.items(): - if issubclass(registered_dt, document_type): - return candidate_converter, document_type, registered_dt - - # then try to find a match with a subclass - for registered_dt, candidate_converter in dataset.document_converters.items(): - if issubclass(document_type, registered_dt): - return candidate_converter, document_type, registered_dt - - raise ValueError( - f"No valid key (either subclass or superclass) was found for the document type '{document_type}' " - f"in the document_converters of the dataset. Available keys: {set(dataset.document_converters)}. " - f"Consider adding a respective converter to the dataset with " - f"dataset.register_document_converter(my_converter_method) where my_converter_method should accept " - f"{dataset.document_type} as input and return '{document_type}'." - ) - - -@overload -def dataset_to_document_type( - dataset: "Dataset", - document_type: Type[Document], - **kwargs, -) -> "Dataset": - ... - - -@overload -def dataset_to_document_type( - dataset: "IterableDataset", - document_type: Type[Document], - **kwargs, -) -> "IterableDataset": - ... - - -def dataset_to_document_type( - dataset: Union["IterableDataset", "Dataset"], - document_type: Type[Document], - **kwargs, -) -> Union["IterableDataset", "Dataset"]: - - # do nothing if the document type is already the requested type - if document_type == dataset.document_type: - logger.info(f"The dataset has already the requested document type {document_type}.") - return dataset - - converter, requested_type, registered_type = _get_best_dataset_converter_with_types( - dataset=dataset, - document_type=document_type, - ) - - result = dataset - if callable(converter): - result = result.map( - function=converter, - result_document_type=registered_type, - fn_kwargs=kwargs, - ) - else: - result = result.cast_document_type( - new_document_type=registered_type, field_mapping=converter, **kwargs - ) - # if the type is not the same or a subclass of the requested type, try to cast (again) - if not issubclass(registered_type, requested_type): - result = result.cast_document_type(new_document_type=requested_type) - - # remove the document converters because they are not valid anymore - result.document_converters = {} - - return result - - -def dataset_register_document_converter( - dataset: Union["Dataset", "IterableDataset"], - converter: Union[Callable[..., D], Dict[str, str]], - document_type: Optional[Type[D]] = None, -) -> None: - if callable(converter) and document_type is None: - dt = _infer_document_type_from_function_return(converter) - else: - dt = document_type - if not (isinstance(dt, type) and issubclass(dt, Document)): - raise TypeError( - f"the (inferred) document_type {dt} is not a subclass of Document. " - "Please provide a document_type or a converter with a return type annotation." - ) - dataset.document_converters[dt] = converter - - -class Dataset(datasets.Dataset, Sequence[D]): - def __init__( - self, - document_type: Type[D], - arrow_table: datasets.table.Table, - info: Optional[datasets.DatasetInfo] = None, - split: Optional[datasets.NamedSplit] = None, - indices_table: Optional[datasets.table.Table] = None, - fingerprint: Optional[str] = None, - document_converters: Optional[DocumentConvertersType] = None, - ): - super().__init__( - arrow_table=arrow_table, - info=info, - split=split, - indices_table=indices_table, - fingerprint=fingerprint, - ) - - self.document_type = document_type - self.set_format("document", document_type=document_type) - self.document_converters = document_converters or {} - - @classmethod - def get_base_kwargs(cls, dataset: datasets.Dataset): - return dict( - arrow_table=dataset._data, - info=dataset.info, - split=dataset.split, - indices_table=dataset._indices, - fingerprint=dataset._fingerprint, - ) - - @classmethod - def from_hf_dataset( - cls, - dataset: datasets.Dataset, - document_type: Type[D], - document_converters: Optional[DocumentConvertersType] = None, - ) -> "Dataset": - document_dataset = cls( - document_type=document_type, - document_converters=document_converters, - **cls.get_base_kwargs(dataset), - ) - return document_dataset - - def apply_hf_func(self, func, **kwargs) -> "Dataset": - return Dataset.from_hf_dataset( - func(self, **kwargs), - document_type=self.document_type, - document_converters=self.document_converters, - ) - - def register_document_converter( - self, - converter: Union[Callable[..., D], Dict[str, str]], - document_type: Optional[Type[D]] = None, - ) -> None: - dataset_register_document_converter( - dataset=self, - converter=converter, - document_type=document_type, - ) - - def to_document_type( - self, - document_type: Type[Document], - **kwargs, - ) -> "Dataset": - return dataset_to_document_type( - dataset=self, - document_type=document_type, - **kwargs, - ) - - def map( - self, - function: Optional[Callable] = None, - with_indices: bool = False, - with_rank: bool = False, - input_columns: Optional[Union[str, List[str]]] = None, - batched: bool = False, - batch_size: Optional[int] = 1000, - drop_last_batch: bool = False, - remove_columns: Optional[Union[str, List[str]]] = None, - keep_in_memory: bool = False, - load_from_cache_file: Optional[bool] = None, - cache_file_name: Optional[str] = None, - writer_batch_size: Optional[int] = 1000, - features: Optional[datasets.Features] = None, - disable_nullable: bool = False, - fn_kwargs: Optional[dict] = None, - num_proc: Optional[int] = None, - suffix_template: str = "_{rank:05d}_of_{num_proc:05d}", - new_fingerprint: Optional[str] = None, - desc: Optional[str] = None, - as_documents: bool = True, - result_document_type: Optional[Type[Document]] = None, - ) -> "Dataset": - dataset = super().map( - function=decorate_convert_to_dict_of_lists(function) if as_documents else function, - with_indices=with_indices, - with_rank=with_rank, - input_columns=input_columns, - batched=batched, - batch_size=batch_size, - drop_last_batch=drop_last_batch, - remove_columns=remove_columns, - keep_in_memory=keep_in_memory, - # ignore typing because typing in Huggingface Dataset.map() is incorrect - load_from_cache_file=load_from_cache_file, # type: ignore - cache_file_name=cache_file_name, - writer_batch_size=writer_batch_size, - features=features, - disable_nullable=disable_nullable, - fn_kwargs=fn_kwargs, - num_proc=num_proc, - suffix_template=suffix_template, - new_fingerprint=new_fingerprint, - desc=desc, - ) - - if result_document_type is None: - result_document_type = self.document_type - - return Dataset.from_hf_dataset( - dataset, - document_type=result_document_type, - document_converters=self.document_converters, - ) - - def cast_document_type( - self, - new_document_type: Type[D], - remove_columns: bool = False, - field_mapping: Optional[Dict[str, str]] = None, - ) -> "Dataset": - field_mapping = field_mapping or {} - - removed_field_names, added_field_names = _check_fields_for_casting( - field_mapping=field_mapping, - current_document_type=self.document_type, - new_document_type=new_document_type, - column_names=self.column_names, - ) - - new_hf_dataset = datasets.Dataset(**self.get_base_kwargs(self)) - - if remove_columns: - new_hf_dataset = new_hf_dataset.remove_columns(list(removed_field_names)) - - rename_targets_already_in_columns = ( - set(field_mapping.values()) - set(field_mapping) - ) & set(new_hf_dataset.column_names) - if len(rename_targets_already_in_columns) > 0: - raise ValueError( - f"rename targets are already in column names: {rename_targets_already_in_columns}. Did you miss " - f"to set remove_columns=True in a previous call of cast_document_type?" - ) - - new_hf_dataset = new_hf_dataset.rename_columns(field_mapping) - for f_name in added_field_names: - if f_name not in new_hf_dataset.column_names: - # add empty columns - new_hf_dataset = new_hf_dataset.add_column( - name=f_name, column=len(new_hf_dataset) * [{}] - ) - new_dataset = Dataset.from_hf_dataset( - new_hf_dataset, - document_type=new_document_type, - document_converters=self.document_converters, - ) - - return new_dataset - - -class IterableDataset(datasets.IterableDataset): - def __init__( - self, - document_type: Type[Document], - hidden_columns: Optional[Set[str]] = None, - document_converters: Optional[DocumentConvertersType] = None, - **kwargs, - ): - super().__init__(**kwargs) - self.document_type = document_type - self._document_field_names = [field.name for field in document_type.fields()] - self.hidden_columns = set() - if hidden_columns is not None: - self.hidden_columns.update(hidden_columns) - self.document_converters = document_converters or {} - - @property - def column_names(self) -> List[str]: - return self._document_field_names + list(self.hidden_columns) - - @classmethod - def get_base_kwargs(cls, dataset: datasets.IterableDataset): - return dict( - ex_iterable=dataset._ex_iterable, - info=dataset.info, - split=dataset.split, - formatting=dataset._formatting, - shuffling=dataset._shuffling, - distributed=dataset._distributed, - token_per_repo_id=dataset._token_per_repo_id, - ) - - @classmethod - def from_hf_dataset( - cls, - dataset: datasets.IterableDataset, - document_type: Type[Document], - hidden_columns: Optional[Set[str]] = None, - document_converters: Optional[DocumentConvertersType] = None, - ) -> "IterableDataset": - dataset = cls( - document_type=document_type, - hidden_columns=hidden_columns, - document_converters=document_converters, - **cls.get_base_kwargs(dataset), - ) - return dataset - - def __iter__(self): - for example in iter(super().__iter__()): - yield self.document_type.fromdict(example) - - def register_document_converter( - self, - converter: Union[Callable[..., D], Dict[str, str]], - document_type: Optional[Type[D]] = None, - ) -> None: - dataset_register_document_converter( - dataset=self, - converter=converter, - document_type=document_type, - ) - - def to_document_type( - self, - document_type: Type[Document], - **kwargs, - ) -> "IterableDataset": - return dataset_to_document_type( - dataset=self, - document_type=document_type, - **kwargs, - ) - - def map( # type: ignore - self, - function: Optional[Callable] = None, - batched: bool = False, - as_documents: bool = True, - result_document_type: Optional[Type[Document]] = None, - **kwargs, - ) -> "IterableDataset": - dataset_mapped = super().map( - function=decorate_convert_to_document_and_back( - function, document_type=self.document_type, batched=batched - ) - if as_documents - else function, - batched=batched, - **kwargs, - ) - - if result_document_type is None: - result_document_type = self.document_type - - return IterableDataset.from_hf_dataset( - dataset_mapped, - document_type=result_document_type, - document_converters=self.document_converters, - ) - - def apply_hf_func(self, func, **kwargs) -> "IterableDataset": - return IterableDataset.from_hf_dataset( - func(self, **kwargs), - document_type=self.document_type, - hidden_columns=self.hidden_columns, - document_converters=self.document_converters, - ) - - def cast_document_type( - self, - new_document_type: Type[D], - remove_columns: bool = False, - field_mapping: Optional[Dict[str, str]] = None, - ) -> "IterableDataset": - field_mapping = field_mapping or {} - - removed_field_names, added_field_names = _check_fields_for_casting( - field_mapping=field_mapping, - current_document_type=self.document_type, - new_document_type=new_document_type, - column_names=self.column_names, - ) - hidden_columns = set(self.hidden_columns) - new_hf_dataset = datasets.IterableDataset(**self.get_base_kwargs(self)) - - if remove_columns: - new_hf_dataset = new_hf_dataset.remove_columns(column_names=list(removed_field_names)) - else: - hidden_columns.update(removed_field_names) - - rename_targets_already_in_columns = ( - set(field_mapping.values()) - set(field_mapping) - ) & hidden_columns - if len(rename_targets_already_in_columns) > 0: - raise ValueError( - f"rename targets are already in column names: {rename_targets_already_in_columns}. Did you " - f"miss to set remove_columns=True in a previous call of cast_document_type?" - ) - - new_hf_dataset = new_hf_dataset.rename_columns(column_mapping=field_mapping) - - new_dataset = IterableDataset.from_hf_dataset( - new_hf_dataset, - hidden_columns=hidden_columns, - document_type=new_document_type, - document_converters=self.document_converters, - ) - - return new_dataset - - def take(self, n) -> "IterableDataset": - return self.apply_hf_func(datasets.IterableDataset.take, n=n) - - -def get_pie_dataset_type( - hf_dataset: Union[datasets.Dataset, datasets.IterableDataset] -) -> Union[Type[Dataset], Type[IterableDataset]]: - if isinstance(hf_dataset, datasets.Dataset): - return Dataset - elif isinstance(hf_dataset, datasets.IterableDataset): - return IterableDataset - else: - raise TypeError( - f"the dataset must be of type Dataset or IterableDataset, but is of type {type(hf_dataset)}" - ) diff --git a/src/pytorch_ie/data/dataset_dict.py b/src/pytorch_ie/data/dataset_dict.py deleted file mode 100644 index e89ee32a..00000000 --- a/src/pytorch_ie/data/dataset_dict.py +++ /dev/null @@ -1,628 +0,0 @@ -import json -import logging -import os -from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, SupportsIndex, Type, TypeVar, Union - -import datasets - -from pytorch_ie.core import Document -from pytorch_ie.data.dataset import Dataset, IterableDataset, get_pie_dataset_type -from pytorch_ie.utils.hydra import resolve_target, serialize_document_type - -from .common import ( - EnterDatasetDictMixin, - EnterDatasetMixin, - ExitDatasetDictMixin, - ExitDatasetMixin, -) - -logger = logging.getLogger(__name__) - -METADATA_FILE_NAME = "metadata.json" - - -D = TypeVar("D", bound=Document) - - -class DatasetDict(datasets.DatasetDict): - def __getitem__(self, k) -> Union[Dataset, IterableDataset]: # type: ignore - """returns an individual dataset split""" - - dataset = super().__getitem__(k) - if isinstance(dataset, (Dataset, IterableDataset)): - return dataset - else: - raise TypeError(f"dataset must be of type Dataset, but is {type(dataset)}") - - @classmethod - def load_dataset(cls, *args, split=None, **kwargs) -> "DatasetDict": - dataset_or_dataset_dict = datasets.load_dataset(*args, split=split, **kwargs) - if isinstance(dataset_or_dataset_dict, (Dataset, IterableDataset)): - if split is None: - raise ValueError( - f"split must be provided if the loaded dataset is not a (Iterable)DatasetDict, " - f"but is {type(dataset_or_dataset_dict)}" - ) - return cls({split: dataset_or_dataset_dict}) - elif isinstance( - dataset_or_dataset_dict, (datasets.DatasetDict, datasets.IterableDatasetDict) - ): - for dataset in dataset_or_dataset_dict.values(): - if not isinstance(dataset, (Dataset, IterableDataset)): - raise TypeError( - f"expected pytorch_ie.Dataset or pytorch_ie.IterableDataset, but got {type(dataset)}" - ) - return cls(dataset_or_dataset_dict) - else: - raise TypeError( - f"expected datasets.DatasetDict, datasets.IterableDatasetDict, pytorch_ie.Dataset, " - f"or pytorch_ie.IterableDataset, but got {type(dataset_or_dataset_dict)}" - ) - - @classmethod - def from_hf( - cls, - hf_dataset: Union[ - datasets.DatasetDict, - datasets.IterableDatasetDict, - Dict[str, datasets.Dataset], - Dict[str, datasets.IterableDataset], - ], - document_type: Union[str, Type[Document]], - ) -> "DatasetDict": - """Creates a PIE DatasetDict from a HuggingFace DatasetDict, or IterableDatasetDict. - If the input is a Dataset or IterableDataset, we create a DatasetDict with one split named "train". - - Args: - hf_dataset: HuggingFace (Iterable)Dataset(Dict) - document_type: document type of the dataset. Can be a subclass of Document or string that can be - resolved to such a type. - """ - - doc_type = resolve_target(document_type) - if not isinstance(doc_type, type) or not issubclass(doc_type, Document): - raise TypeError(f"document_type must be a subclass of Document, but is {doc_type}") - - res = cls( - { - k: get_pie_dataset_type(v).from_hf_dataset(v, document_type=doc_type) - for k, v in hf_dataset.items() - } - ) - return res - - @classmethod - def from_json( # type: ignore - cls, - document_type: Optional[Union[Type[Document], str]] = None, - metadata_path: Optional[Union[str, Path]] = None, - data_dir: Optional[str] = None, - split: Optional[str] = None, - **kwargs, - ) -> "DatasetDict": - """Creates a PIE DatasetDict from JSONLINE files. Uses `datasets.load_dataset("json")` under the hood. - Requires a document type to be provided. If the document type is not provided, we try to load it from the - metadata file. - - Args: - document_type: document type of the dataset - data_dir: Defining the `data_dir` of the dataset configuration. See datasets.load_dataset() for more - information. - metadata_path: path to the metadata file. Should point to a directory containing the metadata file - `metadata.json`. Defaults to the value of the `data_dir` parameter. - split: if provided, only the specified split is loaded. see `datasets.load_dataset()` for more information. - **kwargs: additional keyword arguments for `datasets.load_dataset()` - """ - - # try to load metadata - if metadata_path is None: - metadata_path = data_dir - if metadata_path is not None: - metadata_file_name = Path(metadata_path) / METADATA_FILE_NAME - if os.path.exists(metadata_file_name): - with open(metadata_file_name) as f: - metadata = json.load(f) - document_type = document_type or metadata.get("document_type", None) - - if document_type is None: - raise ValueError( - f"document_type must be provided if it cannot be loaded from the metadata file" - ) - - hf_dataset = datasets.load_dataset("json", data_dir=data_dir, split=split, **kwargs) - if isinstance(hf_dataset, (datasets.Dataset, datasets.IterableDataset)): - if split is None: - raise ValueError( - f"split must be provided if the loaded dataset is not a (Iterable)DatasetDict, " - f"but is {type(hf_dataset)}" - ) - hf_dataset = {split: hf_dataset} - return cls.from_hf(hf_dataset, document_type=document_type) - - def to_json(self, path: Union[str, Path], **kwargs) -> None: - """Serializes the DatasetDict. We convert all documents with `.asdict()` - and dump them with `json.dump()` to one JSONLINE file per split. - - Args: - path: path to the output directory - **kwargs: additional keyword arguments for `json.dump()` - """ - - path = Path(path) - - # save the metadata - metadata = {"document_type": serialize_document_type(self.document_type)} - os.makedirs(path, exist_ok=True) - if os.path.exists(path / METADATA_FILE_NAME): - logger.warning( - f"metadata file '{path / METADATA_FILE_NAME}' already exists, overwriting it" - ) - with open(path / METADATA_FILE_NAME, "w") as f: - json.dump(metadata, f, indent=2) - - # save the splits - for split, dataset in self.items(): - split_path = path / split - logger.info(f'serialize documents to "{split_path}" ...') - os.makedirs(split_path, exist_ok=True) - file_name = split_path / "documents.jsonl" - with open(file_name, "w") as f: - for doc in dataset: - f.write(json.dumps(doc.asdict(), **kwargs) + "\n") - - @property - def document_type(self) -> Type[Document]: - """Returns the document type of the dataset splits. - - Raises an error if there are no splits in the dataset or if the dataset splits have different - document types. - """ - - if len(self) == 0: - raise ValueError("dataset does not contain any splits, cannot determine document type") - document_types = {ds.document_type for ds in self.values()} - if len(document_types) > 1: - raise ValueError( - f"dataset contains splits with different document types: {document_types}" - ) - return next(iter(document_types)) - - @property - def dataset_type(self) -> Union[Type[Dataset], Type[IterableDataset]]: - """Returns the dataset type of the dataset splits, i.e. either `Dataset` or `IterableDataset`. - - Raises an error if there are no splits in the dataset or if the dataset splits have different - dataset types. - """ - - if len(self) == 0: - raise ValueError( - "dataset does not contain any splits, cannot determine the dataset type" - ) - dataset_types = {type(ds) for ds in self.values()} - if len(dataset_types) > 1: - raise ValueError( - f"dataset contains splits with different dataset types: {dataset_types}" - ) - return next(iter(dataset_types)) - - def register_document_converter( - self, - converter: Union[Callable[..., D], Dict[str, str], str], - document_type: Optional[Union[Type[D], str]] = None, - ) -> "DatasetDict": - """Register a converter function or field mapping for a target document type. - - Args: - document_type: The target document type for which the converter should be registered. Can be a subclass - of Document or string that can be resolved to such a type. If `None`, the document type is tried to be - inferred from the converter function signature. - converter: Either a function that converts a document of the document type of this dataset to a document - of the target document_type, a string that can be resolved to such a function, or a field mapping - (dict[str, str]) that maps fields of the document type of this dataset to fields of the target - document_type. - """ - resolved_document_type: Optional[Union[Type[D], Callable]] = None - if document_type is not None: - if isinstance(document_type, str): - resolved_document_type = resolve_target(document_type) - else: - resolved_document_type = document_type - if not ( - isinstance(resolved_document_type, type) - and issubclass(resolved_document_type, Document) - ): - raise TypeError( - f"document_type must be or resolve to a subclass of Document, but is '{document_type}'" - ) - - resolved_converter: Union[Callable[..., Any], dict[str, str]] - if isinstance(converter, str): - resolved_converter = resolve_target(converter) - else: - resolved_converter = converter - if not (callable(resolved_converter) or isinstance(resolved_converter, dict)): - raise TypeError( - f"converter must be a callable or a dict, but is {type(resolved_converter)}" - ) - - for ds in self.values(): - ds.register_document_converter( - document_type=resolved_document_type, converter=resolved_converter - ) - return self - - def to_document_type( - self, - document_type: Union[Type[Document], str], - **kwargs, - ) -> "DatasetDict": - """Converts all documents in the dataset to a new document type using the best registered document converter. - - Args: - document_type: document type to convert the documents to. Can be a subclass of Document or string that - can be resolved to such a type. - """ - - if isinstance(document_type, str): - resolved_document_type = resolve_target(document_type) - else: - resolved_document_type = document_type - if not ( - isinstance(resolved_document_type, type) - and issubclass(resolved_document_type, Document) - ): - raise TypeError( - f"document_type must be a document type or a string that can be resolved to such a type, " - f"but got {document_type}." - ) - - if resolved_document_type == self.document_type: - logger.info(f"The dataset has already the requested document type {document_type}.") - return self - - result = type(self)( - { - name: ds.to_document_type(document_type=resolved_document_type, **kwargs) - for name, ds in self.items() - } - ) - return result - - def map( # type: ignore - self, - function: Optional[Union[Callable, str]] = None, - result_document_type: Optional[Union[str, Type[Document]]] = None, - **kwargs, - ) -> "DatasetDict": - """Applies a function to all documents in the dataset. - - If the function is an object and is derived from the following mixins, the respective logic - is applied: - - EnterDatasetMixin: `enter_dataset(dataset_split, split_name)` is called before the function is - applied to a dataset split - - ExitDatasetMixin: `exit_dataset(processed_dataset_split, split_name)` is called after the function - is applied to a dataset split - - EnterDatasetDictMixin: `enter_dataset_dict(dataset_dict)` is called before any dataset split is - processed (and before any `enter_dataset()` is called) - - ExitDatasetDictMixin: `exit_dataset_dict(processed_dataset_dict)` is called after all dataset splits - are processed (and after all `exit_dataset()` are called) - - Args: - function: function to apply to the documents. If `None`, the identity function is used. If `str`, - the function is resolved from the global namespace. - result_document_type: optional document type of the resulting dataset. Can be a subclass of Document or - string that can be resolved to such a type. If not provided, it is tried to infer it from the - function signature. If this is not possible, the document type of the input dataset - is used. - **kwargs: additional keyword arguments for `datasets.Dataset.map()` - """ - - if function is not None: - func = resolve_target(function) - if not callable(func): - raise TypeError(f"function must be callable, but is of type {type(func)}") - else: - - def identity(x): - # exclude from coverage because its usage happens in the map which is not collected - return x # pragma: no cover - - func = identity - map_kwargs = dict(function=func, **kwargs) - if result_document_type is not None: - map_kwargs["result_document_type"] = resolve_target(result_document_type) - - if isinstance(func, EnterDatasetDictMixin): - func.enter_dataset_dict(self) - - result_dict = {} - for split, dataset in self.items(): - if isinstance(func, EnterDatasetMixin): - func.enter_dataset(dataset=dataset, name=split) - result_dict[split] = dataset.map(**map_kwargs) - if isinstance(func, ExitDatasetMixin): - func.exit_dataset(dataset=result_dict[split], name=split) - - result = type(self)(result_dict) - - if isinstance(func, ExitDatasetDictMixin): - func.exit_dataset_dict(result) - - return result - - def select( - self, - split: str, - start: Optional[SupportsIndex] = None, - stop: Optional[SupportsIndex] = None, - step: Optional[SupportsIndex] = None, - **kwargs, - ) -> "DatasetDict": - """Reduce a certain dataset split to a selection of its documents. This is similar to the Huggingface - `select()`, but adds optional parameters `start`, `stop`, `step` that will be used to create indices, - if available. - - Args: - split: name of the dataset split to modify - start: optional start index of the selection - stop: optional stop index of the selection - step: optional step size of the selection - **kwargs: additional keyword arguments for `datasets.Dataset.select()` - """ - - if stop is not None: - range_args = [stop] - if start is not None: - range_args = [start] + range_args - if step is not None: - range_args = range_args + [step] - kwargs["indices"] = range(*range_args) - - if "indices" in kwargs: - result = type(self)(self) - pie_split = result[split] - if not isinstance(pie_split, Dataset): - raise TypeError( - f"can only select from a Dataset, but the split '{split}' is of type {type(pie_split)}" - ) - result[split] = Dataset.from_hf_dataset( - dataset=pie_split.select(**kwargs), document_type=pie_split.document_type - ) - return result - else: - if len(kwargs) > 0: - logger.warning( - f"arguments for dataset.select() available, but they do not contain 'indices' which is required, " - f"so we do not call select. provided arguments: \n{json.dumps(kwargs, indent=2)}" - ) - return self - - def rename_splits( - self, - mapping: Optional[Dict[str, str]] = None, - keep_other_splits: bool = True, - ) -> "DatasetDict": - """Renames the dataset splits. - - Args: - mapping: mapping from old split names to new split names. - keep_other_splits: if `True` (default), splits not contained in `mapping` are kept in the dataset - """ - - if mapping is None: - mapping = {} - result = type(self)( - { - mapping.get(name, name): data - for name, data in self.items() - if name in mapping or keep_other_splits - } - ) - return result - - def add_test_split( - self, - source_split: str = "train", - target_split: str = "test", - **kwargs, - ) -> "DatasetDict": - """Adds a test split to the dataset by splitting the source split. Uses the Huggingface - `train_test_split()` method.""" - - pie_split = self[source_split] - if not isinstance(pie_split, Dataset): - raise TypeError( - f"can only create a train-test-split from a Dataset, but the source split '{source_split}' is of type " - f"{type(pie_split)}" - ) - split_result_hf = pie_split.train_test_split(**kwargs) - split_result = type(self)( - { - name: Dataset.from_hf_dataset( - ds, - document_type=pie_split.document_type, - document_converters=pie_split.document_converters, - ) - for name, ds in split_result_hf.items() - } - ) - res = type(self)(self) - res[source_split] = split_result["train"] - res[target_split] = split_result["test"] - split_sizes = {k: len(v) for k, v in res.items()} - logger.info(f"dataset size after adding the split: {split_sizes}") - return res - - def drop_splits(self, split_names: List[str]) -> "DatasetDict": - """Drops splits from the dataset. - - Args: - split_names: names of the splits to drop - """ - - result = type(self)({name: ds for name, ds in self.items() if name not in split_names}) - return result - - def concat_splits(self, splits: List[str], target: str) -> "DatasetDict": - """Concatenates selected splits into a new split. - - Args: - splits: names of the splits to concatenate - target: name of the new split - """ - - if any(split not in self for split in splits): - raise ValueError( - f"not all splits to concatenate are present in the dataset: {splits}, {self.keys()}" - ) - if len(splits) == 0: - raise ValueError("please provide at least one split to concatenate") - result = type(self)({name: ds for name, ds in self.items() if name not in splits}) - if not issubclass(self.dataset_type, Dataset): - raise TypeError( - f"can only concatenate splits if the dataset type is a Dataset, but it is {self.dataset_type}" - ) - splits_to_concat: List[Dataset] = [self[name] for name in splits] # type: ignore - if any(self.dataset_type != type(ds) for ds in splits_to_concat): - raise ValueError( - f"not all splits to concatenate have the same dataset type: " - f"{({name: type(self[name]) for name in splits})}" - ) - document_converters = None - for ds in splits_to_concat: - if ds.document_converters is not None: - if document_converters is None: - document_converters = {} - document_converters.update(ds.document_converters) - # TODO: why do we need to ignore the typing here? - concatenated = datasets.concatenate_datasets(splits_to_concat) # type: ignore - if not issubclass(self.dataset_type, type(concatenated)): - raise ValueError( - f"concatenated dataset is not of the same type as the original dataset: " - f"{self.dataset_type}, {type(concatenated)}" - ) - result[target] = self.dataset_type.from_hf_dataset( - concatenated, document_type=self.document_type, document_converters=document_converters - ) - split_sizes = {k: len(v) for k, v in result.items()} - logger.info(f"dataset size after concatenating splits: {split_sizes}") - return result - - def filter( # type: ignore - self, - split: str, - function: Optional[Union[Callable[[Dict], bool], str]] = None, - result_split_name: Optional[str] = None, - **kwargs, - ) -> "DatasetDict": - """Filters a dataset split using a filter function. - - Note: In contrast to `map`, the filter function gets the example dict instead of a document as input - because the PIE variant of `Dataset.filter()` is not yet implemented and, thus, the Huggingface - variant is internally used instead. - - Args: - split: name of the split to filter - function: filter function that is called on each example dict. Can be provided as a callable or as a - string that is resolved to a callable using `resolve_target()`. - result_split_name: name of the split to store the filtered examples in. If `None`, the filtered examples - are stored in the same split as the original examples. - - """ - - if function is not None: - # create a shallow copy to not modify the input - result = type(self)(self) - function = resolve_target(function) - pie_split = result[split] - # TODO: Implement pytorch_ie.Dataset.filter() in a similar way such as map() to make use of the - # document type. For now, the filter function is called directly on the HF dataset and thus needs to - # accept a dict as input. - # we need to convert the dataset back to HF because the filter function internally uses map() which will - # break if the PIE variant is used - hf_split: Union[datasets.Dataset, datasets.IterableDataset] - if isinstance(pie_split, Dataset): - hf_split = datasets.Dataset(**Dataset.get_base_kwargs(pie_split)) - elif isinstance(pie_split, IterableDataset): - hf_split = datasets.IterableDataset(**IterableDataset.get_base_kwargs(pie_split)) - else: - raise ValueError(f"dataset split has unknown type: {type(pie_split)}") - hf_split_filtered = hf_split.filter(function=function, **kwargs) - target_split_name = result_split_name or split - target_split = type(pie_split).from_hf_dataset( - dataset=hf_split_filtered, # type: ignore - document_type=pie_split.document_type, - document_converters=pie_split.document_converters, - ) - # iterable datasets do not have a length - if not isinstance(target_split, IterableDataset): - logger.info( - f"filtered split [{target_split_name}] has {len(target_split)} entries" - ) - result[target_split_name] = target_split - return result - else: - return self - - def move_to_new_split( - self, - ids: Optional[List[str]] = None, - filter_function: Optional[Union[Callable[[Dict[str, Any]], bool], str]] = None, - source_split: str = "train", - target_split: str = "test", - ) -> "DatasetDict": - """Moves examples from one split to another split. ids or a filter function can be provided to select the - examples to move. - - Args: - ids: list of ids of the examples to move - filter_function: filter function that is called on each example dict. Can be provided as a callable or as a - string that can be resolved to such a callable. - source_split: name of the split to move the examples from - target_split: name of the split to move the examples to - """ - - if filter_function is not None: - filter_func = resolve_target(filter_function) - else: - if ids is None: - raise ValueError("please provide either a list of ids or a filter function") - - ids_set = set(ids) - - def filter_with_ids(ex: Dict[str, Any]): - # exclude from coverage because its usage happens in the map which is not collected - return ex["id"] in ids_set # pragma: no cover - - filter_func = filter_with_ids - - dataset_with_only_ids = self.filter( - split=source_split, - function=filter_func, - ) - dataset_without_ids = self.filter( - split=source_split, - function=lambda ex: not filter_func(ex), - ) - dataset_without_ids[target_split] = dataset_with_only_ids[source_split] - - split_sizes = {k: len(v) for k, v in dataset_without_ids.items()} - logger.info(f"dataset size after moving to new split: {split_sizes}") - return dataset_without_ids - - def cast_document_type( - self, new_document_type: Union[Type[Document], str], **kwargs - ) -> "DatasetDict": - """Casts the document type of all splits to a new document type.""" - - new_type = resolve_target(new_document_type) - - result = type(self)( - { - name: ds.cast_document_type(new_document_type=new_type, **kwargs) - for name, ds in self.items() - } - ) - return result diff --git a/src/pytorch_ie/data/dataset_formatter.py b/src/pytorch_ie/data/dataset_formatter.py deleted file mode 100644 index cdfac3a1..00000000 --- a/src/pytorch_ie/data/dataset_formatter.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import List - -import pyarrow as pa -from datasets.formatting.formatting import Formatter - -from pytorch_ie.core.document import Document - - -class DocumentFormatter(Formatter[Document, list, List[Document]]): - def __init__(self, document_type, features=None, **kwargs): - super().__init__(features=None) - self.document_type = document_type - - def format_row(self, pa_table: pa.Table) -> Document: - row = self.python_arrow_extractor().extract_row(pa_table) - return self.document_type.fromdict(row) - - def format_column(self, pa_table: pa.Table) -> list: - return [] - - def format_batch(self, pa_table: pa.Table) -> List[Document]: - batch = self.simple_arrow_extractor().extract_batch(pa_table).to_pylist() - return [self.document_type.fromdict(b) for b in batch] diff --git a/src/pytorch_ie/data/document_conversion.py b/src/pytorch_ie/data/document_conversion.py deleted file mode 100644 index 9c68b66b..00000000 --- a/src/pytorch_ie/data/document_conversion.py +++ /dev/null @@ -1,292 +0,0 @@ -import functools -import logging -from collections import defaultdict -from copy import copy, deepcopy -from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Type, TypeVar, Union - -from transformers import PreTrainedTokenizer - -from pytorch_ie.annotations import Span -from pytorch_ie.core import Annotation -from pytorch_ie.documents import TextBasedDocument, TokenBasedDocument -from pytorch_ie.utils.hydra import resolve_target - -logger = logging.getLogger(__name__) - -ToD = TypeVar("ToD", bound=TokenBasedDocument) -TeD = TypeVar("TeD", bound=TextBasedDocument) - - -def text_based_document_to_token_based( - doc: TextBasedDocument, - result_document_type: Union[Type[ToD], str], - tokens: Optional[List[str]] = None, - token_offset_mapping: Optional[List[Tuple[int, int]]] = None, - char_to_token: Optional[Callable[[int], Optional[int]]] = None, - strict_span_conversion: bool = True, - verbose: bool = True, -) -> ToD: - document_type: Type[ToD] - if isinstance(result_document_type, str): - document_type = resolve_target(result_document_type) # type: ignore - else: - document_type = result_document_type - if not (isinstance(document_type, type) and issubclass(document_type, TokenBasedDocument)): - raise TypeError( - f"result_document_type must be a subclass of TokenBasedDocument or a string that resolves to that, " - f"but got {result_document_type}" - ) - if tokens is None: - tokens = doc.metadata.get("tokens") - if tokens is None: - raise ValueError( - "tokens must be provided to convert a text based document to token based, but got None" - ) - result = document_type(tokens=tuple(tokens), id=doc.id, metadata=deepcopy(doc.metadata)) - - # save text, token_offset_mapping and char_to_token (if available) in metadata - result.metadata["text"] = doc.text - token_offset_mapping_lists: Optional[List[List[int]]] - if token_offset_mapping is not None: - # convert offset tuples to lists because serialization and deserialization again - # will produce lists in any way (json does not know tuples) - token_offset_mapping_lists = [list(offsets) for offsets in token_offset_mapping] - if ( - "token_offset_mapping" in doc.metadata - and doc.metadata["token_offset_mapping"] != token_offset_mapping_lists - ): - logger.warning( - "token_offset_mapping in metadata is different from the new token_offset_mapping, " - "overwrite the metadata" - ) - result.metadata["token_offset_mapping"] = token_offset_mapping_lists - else: - token_offset_mapping_lists = doc.metadata.get("token_offset_mapping") - if token_offset_mapping_lists is not None: - token_offset_mapping = [tuple(offsets) for offsets in token_offset_mapping_lists] # type: ignore - if char_to_token is not None: - if "char_to_token" in doc.metadata and doc.metadata["char_to_token"] != char_to_token: - logger.warning( - "char_to_token in metadata is different from the new char_to_token, overwrite the metadata" - ) - result.metadata["char_to_token"] = char_to_token - else: - char_to_token = doc.metadata.get("char_to_token") - - # construct the char_to_token function, if not provided, from the token_offset_mapping - if char_to_token is None: - if token_offset_mapping is None: - raise ValueError( - "either token_offset_mapping or char_to_token must be provided to convert a text " - "based document to token based, but both are None" - ) - char_to_token_dict: Dict[int, int] = {} - for token_idx, (start, end) in enumerate(token_offset_mapping): - for char_idx in range(start, end): - char_to_token_dict[char_idx] = token_idx - - def char_to_token(char_idx: int) -> Optional[int]: - return char_to_token_dict.get(char_idx) - - text_targeting_layers = [ - annotation_field.name - for annotation_field in doc.annotation_fields() - if "text" in annotation_field.metadata["targets"] - ] - - override_annotations: Dict[str, Dict[int, Annotation]] = {} - removed_annotations: Dict[str, Set[int]] = defaultdict(set) - for text_targeting_layer_name in text_targeting_layers: - override_annotations[text_targeting_layer_name] = {} - char_span: Span - for char_span in doc[text_targeting_layer_name]: - if not isinstance(char_span, Span): - raise ValueError( - f"can not convert layers that target the text but contain non-span annotations, " - f"but found {type(char_span)} in layer {text_targeting_layer_name}" - ) - start_token_idx = char_to_token(char_span.start) - end_token_idx_inclusive = char_to_token(char_span.end - 1) - if start_token_idx is None or end_token_idx_inclusive is None: - if strict_span_conversion: - raise ValueError( - f'cannot find token span for character span: "{char_span}", text="{doc.text}", ' - f"token_offset_mapping={token_offset_mapping}" - ) - else: - if verbose: - logger.warning( - f'cannot find token span for character span "{char_span}", skip it (disable this ' - f"warning with verbose=False)" - ) - removed_annotations[text_targeting_layer_name].add(char_span._id) - else: - token_span = char_span.copy(start=start_token_idx, end=end_token_idx_inclusive + 1) - override_annotations[text_targeting_layer_name][char_span._id] = token_span - valid_spans = set(override_annotations[text_targeting_layer_name].values()) - result[text_targeting_layer_name].extend(sorted(valid_spans, key=lambda span: span.start)) - - result.add_all_annotations_from_other( - doc, - override_annotations=override_annotations, - removed_annotations=removed_annotations, - strict=strict_span_conversion, - verbose=verbose, - ) - - return result - - -def token_based_document_to_text_based( - doc: TokenBasedDocument, - result_document_type: Union[Type[TeD], str], - text: Optional[str] = None, - token_offset_mapping: Optional[List[Tuple[int, int]]] = None, - join_tokens_with: Optional[str] = None, - strict_span_conversion: bool = True, - verbose: bool = True, -) -> TeD: - document_type: Type[TeD] - if isinstance(result_document_type, str): - document_type = resolve_target(result_document_type) # type: ignore - else: - document_type = result_document_type - if not (isinstance(document_type, type) and issubclass(document_type, TextBasedDocument)): - raise TypeError( - f"result_document_type must be a subclass of TextBasedDocument or a string that resolves to that, " - f"but got {result_document_type}" - ) - # if a token_separator is provided, we construct the text from the tokens - if join_tokens_with is not None: - start = 0 - token_offset_mapping = [] - tokens = doc.tokens - for token in tokens: - end = start + len(token) - token_offset_mapping.append((start, end)) - # we add the separator after each token - start = end + len(join_tokens_with) - text = join_tokens_with.join(tokens) - else: - text = doc.metadata.get("text") if text is None else text - if text is None: - raise ValueError( - "if join_tokens_with is None, text must be provided, but got None as well" - ) - token_offset_mapping_lists = ( - doc.metadata.get("token_offset_mapping") - if token_offset_mapping is None - else token_offset_mapping - ) - if token_offset_mapping_lists is None: - raise ValueError( - "if join_tokens_with is None, token_offsets must be provided, but got None as well" - ) - else: - token_offset_mapping = [tuple(offsets) for offsets in token_offset_mapping_lists] # type: ignore - - result = document_type(text=text, id=doc.id, metadata=deepcopy(doc.metadata)) - if "tokens" in doc.metadata and doc.metadata["tokens"] != list(doc.tokens): - logger.warning("tokens in metadata are different from new tokens, overwrite the metadata") - result.metadata["tokens"] = list(doc.tokens) - # convert offset tuples to lists because serialization and deserialization again - # will produce lists in any way (json does not know tuples) - token_offset_mapping_lists = [list(offsets) for offsets in token_offset_mapping] - if ( - "token_offset_mapping" in doc.metadata - and doc.metadata["token_offset_mapping"] != token_offset_mapping_lists - ): - logger.warning( - "token_offset_mapping in metadata is different from the new token_offset_mapping, " - "overwrite the metadata" - ) - result.metadata["token_offset_mapping"] = token_offset_mapping_lists - - token_targeting_layers = [ - annotation_field.name - for annotation_field in doc.annotation_fields() - if "tokens" in annotation_field.metadata["targets"] - ] - - override_annotations: Dict[str, Dict[int, Annotation]] = {} - removed_annotations: Dict[str, Set[int]] = defaultdict(set) - for token_targeting_layer_name in token_targeting_layers: - override_annotations[token_targeting_layer_name] = {} - for token_span in doc[token_targeting_layer_name]: - if not isinstance(token_span, Span): - raise ValueError( - f"can not convert layers that target the tokens but contain non-span annotations, " - f"but found {type(token_span)} in layer {token_targeting_layer_name}" - ) - start_char_idx = token_offset_mapping[token_span.start][0] - end_char_idx = token_offset_mapping[token_span.end - 1][1] - - char_span = token_span.copy(start=start_char_idx, end=end_char_idx) - override_annotations[token_targeting_layer_name][token_span._id] = char_span - valid_spans = set(override_annotations[token_targeting_layer_name].values()) - result[token_targeting_layer_name].extend(sorted(valid_spans, key=lambda span: span.start)) - - result.add_all_annotations_from_other( - doc, - override_annotations=override_annotations, - removed_annotations=removed_annotations, - strict=strict_span_conversion, - verbose=verbose, - ) - - return result - - -def tokenize_document( - doc: TextBasedDocument, - tokenizer: PreTrainedTokenizer, - result_document_type: Type[ToD], - partition_layer: Optional[str] = None, - strict_span_conversion: bool = True, - verbose: bool = True, - **tokenize_kwargs, -) -> List[ToD]: - result = [] - partitions: Iterable[Span] - if partition_layer is None: - partitions = [Span(start=0, end=len(doc.text))] - else: - partitions = doc[partition_layer] - for partition in partitions: - text = doc.text[partition.start : partition.end] - current_tokenize_kwargs = copy(tokenize_kwargs) - if "text" in tokenize_kwargs: - current_tokenize_kwargs["text_pair"] = text - sequence_index = 1 - else: - current_tokenize_kwargs["text"] = text - sequence_index = 0 - tokenized_text = tokenizer(**current_tokenize_kwargs) - for batch_encoding in tokenized_text.encodings: - token_offset_mapping = batch_encoding.offsets - char_to_token: Optional[Callable[[int], Optional[int]]] - char_to_token = functools.partial( - batch_encoding.char_to_token, sequence_index=sequence_index - ) - token_offset_mapping = [ - offsets if s_id == sequence_index else (0, 0) - for s_id, offsets in zip(batch_encoding.sequence_ids, token_offset_mapping) - ] - if partition.start > 0: - token_offset_mapping = [ - (start + partition.start, end + partition.start) - for start, end in token_offset_mapping - ] - char_to_token = None - tokenized_document = text_based_document_to_token_based( - doc, - tokens=batch_encoding.tokens, - result_document_type=result_document_type, - token_offset_mapping=token_offset_mapping, - char_to_token=char_to_token, - strict_span_conversion=strict_span_conversion, - verbose=verbose, - ) - tokenized_document.metadata["tokenizer_encoding"] = batch_encoding - result.append(tokenized_document) - return result diff --git a/src/pytorch_ie/metrics/statistics.py b/src/pytorch_ie/metrics/statistics.py deleted file mode 100644 index a1cff8a5..00000000 --- a/src/pytorch_ie/metrics/statistics.py +++ /dev/null @@ -1,244 +0,0 @@ -import logging -from collections import defaultdict -from typing import Any, Dict, List, Optional, Type, Union - -from transformers import AutoTokenizer, PreTrainedTokenizer - -from pytorch_ie import tokenize_document -from pytorch_ie.annotations import Span -from pytorch_ie.core import Document, DocumentStatistic -from pytorch_ie.documents import TextBasedDocument, TokenBasedDocument -from pytorch_ie.utils.hydra import resolve_optional_document_type - -logger = logging.getLogger(__name__) - - -class TokenCountCollector(DocumentStatistic): - """Collects the token count of a field when tokenizing its content with a Huggingface tokenizer. - - The content of the field should be a string. - """ - - def __init__( - self, - tokenizer: Union[str, PreTrainedTokenizer], - text_field: str = "text", - tokenizer_kwargs: Optional[Dict[str, Any]] = None, - document_type: Optional[Type[Document]] = None, - **kwargs, - ): - if document_type is None and text_field == "text": - document_type = TextBasedDocument - super().__init__(document_type=document_type, **kwargs) - self.tokenizer = ( - AutoTokenizer.from_pretrained(tokenizer) if isinstance(tokenizer, str) else tokenizer - ) - self.tokenizer_kwargs = tokenizer_kwargs or {} - self.text_field = text_field - - def _collect(self, doc: Document) -> int: - text = getattr(doc, self.text_field) - encodings = self.tokenizer(text, **self.tokenizer_kwargs) - tokens = encodings.tokens() - return len(tokens) - - -class FieldLengthCollector(DocumentStatistic): - """Collects the length of a field, e.g. to collect the number the characters in the input text. - - The field should be a list of sized elements. - """ - - def __init__(self, field: str, **kwargs): - super().__init__(**kwargs) - self.field = field - - def _collect(self, doc: Document) -> int: - field_obj = getattr(doc, self.field) - return len(field_obj) - - -class SubFieldLengthCollector(DocumentStatistic): - """Collects the length of a subfield in a field, e.g. to collect the number of arguments of N-ary relations.""" - - def __init__(self, field: str, subfield: str, **kwargs): - super().__init__(**kwargs) - self.field = field - self.subfield = subfield - - def _collect(self, doc: Document) -> List[int]: - field_obj = getattr(doc, self.field) - lengths = [] - for entry in field_obj: - subfield_obj = getattr(entry, self.subfield) - lengths.append(len(subfield_obj)) - return lengths - - -class SpanLengthCollector(DocumentStatistic): - """Collects the lengths of Span annotations. If labels are provided, the lengths collected per - label. - - If a tokenizer is provided, the span length is calculated in means of tokens, otherwise in - means of characters. - """ - - DEFAULT_AGGREGATION_FUNCTIONS = ["len", "mean", "std", "min", "max"] - - def __init__( - self, - layer: str, - tokenize: bool = False, - tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - tokenized_document_type: Optional[Union[str, Type[TokenBasedDocument]]] = None, - labels: Optional[Union[List[str], str]] = None, - label_attribute: str = "label", - tokenize_kwargs: Optional[Dict[str, Any]] = None, - **kwargs, - ): - super().__init__(**kwargs) - self.layer = layer - if isinstance(labels, str) and labels != "INFERRED": - raise ValueError("labels must be a list of strings or 'INFERRED'") - if labels == "INFERRED": - logger.warning( - f"Inferring labels with {self.__class__.__name__} from data produces wrong results " - f"for certain aggregation functions (e.g. 'mean', 'std', 'min') because zero values " - f"are not included in the calculation. We remove these aggregation functions from " - f"this collector, but be aware that the results may be wrong for your own aggregation " - f"functions that rely on zero values." - ) - self.aggregation_functions = { - name: func - for name, func in self.aggregation_functions.items() - if name not in ["mean", "std", "min"] - } - self.labels = labels - self.label_field = label_attribute - self.tokenize = tokenize - if self.tokenize: - if tokenizer is None: - raise ValueError( - "tokenizer must be provided to calculate the span length in means of tokens" - ) - if isinstance(tokenizer, str): - tokenizer = AutoTokenizer.from_pretrained(tokenizer) - self.tokenizer = tokenizer - resolved_tokenized_document_type = resolve_optional_document_type( - tokenized_document_type - ) - if resolved_tokenized_document_type is None: - raise ValueError( - "tokenized_document_type must be provided to calculate the span length in means of tokens" - ) - if not ( - isinstance(resolved_tokenized_document_type, type) - and issubclass(resolved_tokenized_document_type, TokenBasedDocument) - ): - raise TypeError( - f"tokenized_document_type must be a subclass of TokenBasedDocument, but it is: " - f"{resolved_tokenized_document_type}" - ) - self.tokenized_document_type = resolved_tokenized_document_type - self.tokenize_kwargs = tokenize_kwargs or {} - - def _collect(self, doc: Document) -> Union[List[int], Dict[str, List[int]]]: - docs: Union[List[Document], List[TokenBasedDocument]] - if self.tokenize: - if not isinstance(doc, TextBasedDocument): - raise ValueError( - "doc must be a TextBasedDocument to calculate the span length in means of tokens" - ) - if not isinstance(doc, TextBasedDocument): - raise ValueError( - "doc must be a TextBasedDocument to calculate the span length in means of tokens" - ) - docs = tokenize_document( - doc, - tokenizer=self.tokenizer, - result_document_type=self.tokenized_document_type, - **self.tokenize_kwargs, - ) - else: - docs = [doc] - - values: Dict[str, List[int]] - if isinstance(self.labels, str): - values = defaultdict(list) - else: - values = {label: [] for label in self.labels or ["ALL"]} - for doc in docs: - layer_obj = getattr(doc, self.layer) - for span in layer_obj: - if not isinstance(span, Span): - raise TypeError( - f"span length calculation is not yet supported for {type(span)}" - ) - length = span.end - span.start - if self.labels is None: - label = "ALL" - else: - label = getattr(span, self.label_field) - values[label].append(length) - - return values if self.labels is not None else values["ALL"] - - -class DummyCollector(DocumentStatistic): - """A dummy collector that always returns 1, e.g. to count the number of documents. - - Can be used to count the number of documents. - """ - - DEFAULT_AGGREGATION_FUNCTIONS = ["sum"] - - def _collect(self, doc: Document) -> int: - return 1 - - -class LabelCountCollector(DocumentStatistic): - """Collects the number of field entries per label, e.g. to collect the number of entities per type. - - The field should be a list of elements with a label attribute. - - Important: To make correct use of the result data, missing values need to be filled with 0, e.g.: - {("ORG",): [2, 3], ("LOC",): [2]} -> {("ORG",): [2, 3], ("LOC",): [2, 0]} - """ - - DEFAULT_AGGREGATION_FUNCTIONS = ["mean", "std", "min", "max", "len", "sum"] - - def __init__( - self, field: str, labels: Union[List[str], str], label_attribute: str = "label", **kwargs - ): - super().__init__(**kwargs) - self.field = field - self.label_attribute = label_attribute - if not (isinstance(labels, list) or labels == "INFERRED"): - raise ValueError("labels must be a list of strings or 'INFERRED'") - if labels == "INFERRED": - logger.warning( - f"Inferring labels with {self.__class__.__name__} from data produces wrong results " - f"for certain aggregation functions (e.g. 'mean', 'std', 'min') because zero values " - f"are not included in the calculation. We remove these aggregation functions from " - f"this collector, but be aware that the results may be wrong for your own aggregation " - f"functions that rely on zero values." - ) - self.aggregation_functions = { - name: func - for name, func in self.aggregation_functions.items() - if name not in ["mean", "std", "min"] - } - - self.labels = labels - - def _collect(self, doc: Document) -> Dict[str, int]: - field_obj = getattr(doc, self.field) - counts: Dict[str, int] - if self.labels == "INFERRED": - counts = defaultdict(int) - else: - counts = {label: 0 for label in self.labels} - for elem in field_obj: - label = getattr(elem, self.label_attribute) - counts[label] += 1 - return dict(counts) diff --git a/tests/__init__.py b/tests/__init__.py index 4547b94e..fc4c25bc 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,18 +1,4 @@ from pathlib import Path -from datasets import DownloadMode, load_dataset - TESTS_ROOT = Path(__file__).parent FIXTURES_ROOT = TESTS_ROOT / "fixtures" -DATASET_BUILDERS_ROOT = Path("dataset_builders") - - -def _check_hf_conll2003_is_available(): - try: - load_dataset("conll2003", download_mode=DownloadMode.FORCE_REDOWNLOAD) - return True - except ConnectionError: - return False - - -_HF_CONLL2003_IS_AVAILABLE = _check_hf_conll2003_is_available() diff --git a/tests/conftest.py b/tests/conftest.py index a3edd111..de86b55e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,16 +1,13 @@ import dataclasses +import json -import datasets import pytest from pytorch_ie.annotations import BinaryRelation, LabeledSpan, Span from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.data import Dataset, IterableDataset from pytorch_ie.documents import TextDocument from tests import FIXTURES_ROOT -datasets.disable_caching() - @dataclasses.dataclass class TestDocument(TextDocument): @@ -19,41 +16,6 @@ class TestDocument(TextDocument): relations: AnnotationList[BinaryRelation] = annotation_field(target="entities") -@pytest.fixture -def json_dataset(): - dataset_dir = FIXTURES_ROOT / "datasets" / "json" - - dataset = datasets.load_dataset( - path="json", - field="data", - data_files={ - "train": str(dataset_dir / "train.json"), - "validation": str(dataset_dir / "val.json"), - "test": str(dataset_dir / "test.json"), - }, - ) - - return dataset - - -@pytest.fixture -def iterable_json_dataset(): - dataset_dir = FIXTURES_ROOT / "datasets" / "json" - - dataset = datasets.load_dataset( - path="json", - field="data", - data_files={ - "train": str(dataset_dir / "train.json"), - "validation": str(dataset_dir / "val.json"), - "test": str(dataset_dir / "test.json"), - }, - streaming=True, - ) - - return dataset - - def example_to_doc_dict(example): doc = TestDocument(text=example["text"], id=example["id"]) @@ -81,48 +43,20 @@ def example_to_doc_dict(example): @pytest.fixture -def dataset(json_dataset): - mapped_dataset = json_dataset.map(example_to_doc_dict) - - dataset = datasets.DatasetDict( - { - k: Dataset.from_hf_dataset(dataset, document_type=TestDocument) - for k, dataset in mapped_dataset.items() - } - ) - - assert len(dataset) == 3 - assert set(dataset.keys()) == {"train", "validation", "test"} - - assert len(dataset["train"]) == 8 - assert len(dataset["validation"]) == 2 - assert len(dataset["test"]) == 2 - - return dataset - - -@pytest.fixture -def documents(dataset): - return list(dataset["train"]) +def document_dataset(): + result = {} + for path in (FIXTURES_ROOT / "datasets" / "json").iterdir(): + loaded_data = json.load(open(path))["data"] + docs = [TestDocument.fromdict(example_to_doc_dict(ex)) for ex in loaded_data] + result[path.stem] = docs + return result @pytest.fixture -def iterable_dataset(iterable_json_dataset): - dataset = datasets.IterableDatasetDict( - { - k: IterableDataset.from_hf_dataset( - dataset.map(example_to_doc_dict), document_type=TestDocument - ) - for k, dataset in iterable_json_dataset.items() - } - ) - - assert len(dataset) == 3 - assert set(dataset.keys()) == {"train", "validation", "test"} - - return dataset +def documents(document_dataset): + return document_dataset["train"] -@pytest.fixture(params=["dataset", "iterable_dataset"]) -def maybe_iterable_dataset(request): - return request.getfixturevalue(request.param) +def test_documents(documents): + assert len(documents) == 8 + assert all(isinstance(doc, TestDocument) for doc in documents) diff --git a/tests/core/test_statistic.py b/tests/core/test_statistic.py deleted file mode 100644 index c8dd2db8..00000000 --- a/tests/core/test_statistic.py +++ /dev/null @@ -1,225 +0,0 @@ -import dataclasses - -import pytest - -from pytorch_ie import DatasetDict -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextBasedDocument, TokenBasedDocument -from pytorch_ie.metrics.statistics import ( - DummyCollector, - FieldLengthCollector, - LabelCountCollector, - SpanLengthCollector, - SubFieldLengthCollector, - TokenCountCollector, -) -from tests import FIXTURES_ROOT - - -@pytest.fixture -def dataset(): - @dataclasses.dataclass - class Conll2003Document(TextBasedDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - return DatasetDict.from_json( - data_dir=FIXTURES_ROOT / "dataset_dict" / "conll2003_extract", - document_type=Conll2003Document, - ) - - -def test_statistics(dataset): - statistic = DummyCollector() - values = statistic(dataset) - assert values == {"train": {"sum": 3}, "test": {"sum": 3}, "validation": {"sum": 3}} - - statistic = LabelCountCollector(field="entities", labels=["LOC", "PER", "ORG", "MISC"]) - values = statistic(dataset) - assert values == { - "train": { - "LOC": { - "mean": 0.3333333333333333, - "std": 0.4714045207910317, - "min": 0, - "max": 1, - "len": 3, - "sum": 1, - }, - "PER": { - "mean": 0.3333333333333333, - "std": 0.4714045207910317, - "min": 0, - "max": 1, - "len": 3, - "sum": 1, - }, - "ORG": { - "mean": 0.3333333333333333, - "std": 0.4714045207910317, - "min": 0, - "max": 1, - "len": 3, - "sum": 1, - }, - "MISC": { - "mean": 0.6666666666666666, - "std": 0.9428090415820634, - "min": 0, - "max": 2, - "len": 3, - "sum": 2, - }, - }, - "validation": { - "LOC": { - "mean": 0.3333333333333333, - "std": 0.4714045207910317, - "min": 0, - "max": 1, - "len": 3, - "sum": 1, - }, - "PER": { - "mean": 0.3333333333333333, - "std": 0.4714045207910317, - "min": 0, - "max": 1, - "len": 3, - "sum": 1, - }, - "ORG": {"mean": 1.0, "std": 0.816496580927726, "min": 0, "max": 2, "len": 3, "sum": 3}, - "MISC": { - "mean": 0.3333333333333333, - "std": 0.4714045207910317, - "min": 0, - "max": 1, - "len": 3, - "sum": 1, - }, - }, - "test": { - "LOC": {"mean": 1.0, "std": 0.816496580927726, "min": 0, "max": 2, "len": 3, "sum": 3}, - "PER": { - "mean": 0.6666666666666666, - "std": 0.4714045207910317, - "min": 0, - "max": 1, - "len": 3, - "sum": 2, - }, - "ORG": {"mean": 0.0, "std": 0.0, "min": 0, "max": 0, "len": 3, "sum": 0}, - "MISC": {"mean": 0.0, "std": 0.0, "min": 0, "max": 0, "len": 3, "sum": 0}, - }, - } - - statistic = LabelCountCollector(field="entities", labels="INFERRED") - values = statistic(dataset) - assert values == { - "train": { - "ORG": {"max": 1, "len": 1, "sum": 1}, - "MISC": {"max": 2, "len": 1, "sum": 2}, - "PER": {"max": 1, "len": 1, "sum": 1}, - "LOC": {"max": 1, "len": 1, "sum": 1}, - }, - "validation": { - "ORG": {"max": 2, "len": 2, "sum": 3}, - "LOC": {"max": 1, "len": 1, "sum": 1}, - "MISC": {"max": 1, "len": 1, "sum": 1}, - "PER": {"max": 1, "len": 1, "sum": 1}, - }, - "test": {"LOC": {"max": 2, "len": 2, "sum": 3}, "PER": {"max": 1, "len": 2, "sum": 2}}, - } - - statistic = FieldLengthCollector(field="text") - values = statistic(dataset) - assert values == { - "test": {"max": 57, "mean": 36.0, "min": 11, "std": 18.991226044325487}, - "train": {"max": 48, "mean": 27.333333333333332, "min": 15, "std": 14.70449666674185}, - "validation": {"max": 187, "mean": 89.66666666666667, "min": 17, "std": 71.5603863103665}, - } - - statistic = SpanLengthCollector(layer="entities") - values = statistic(dataset) - assert values == { - "train": {"len": 5, "mean": 7.6, "std": 4.223742416388575, "min": 2, "max": 15}, - "validation": { - "len": 6, - "mean": 10.833333333333334, - "std": 2.9674156357941426, - "min": 6, - "max": 14, - }, - "test": {"len": 5, "mean": 9.4, "std": 5.748043145279966, "min": 5, "max": 20}, - } - - statistic = SpanLengthCollector(layer="entities", labels="INFERRED") - values = statistic(dataset) - assert values == { - "train": { - "ORG": {"max": 2, "len": 1}, - "MISC": {"max": 7, "len": 2}, - "PER": {"max": 15, "len": 1}, - "LOC": {"max": 8, "len": 1}, - }, - "test": { - "LOC": { - "max": 20, - "len": 3, - }, - "PER": {"max": 11, "len": 2}, - }, - "validation": { - "ORG": {"max": 14, "len": 3}, - "LOC": {"max": 6, "len": 1}, - "MISC": {"max": 11, "len": 1}, - "PER": {"max": 12, "len": 1}, - }, - } - - # this is not super useful, we just collect teh lengths of the labels, but it is enough to test the code - statistic = SubFieldLengthCollector(field="entities", subfield="label") - values = statistic(dataset) - assert values == { - "test": {"max": 3, "mean": 3.0, "min": 3, "std": 0.0}, - "train": {"max": 4, "mean": 3.4, "min": 3, "std": 0.4898979485566356}, - "validation": {"max": 4, "mean": 3.1666666666666665, "min": 3, "std": 0.3726779962499649}, - } - - -@pytest.mark.slow -def test_statistics_with_tokenize(dataset): - statistic = TokenCountCollector( - text_field="text", - tokenizer="bert-base-uncased", - tokenizer_kwargs=dict(add_special_tokens=False), - ) - values = statistic(dataset) - assert values == { - "test": {"max": 12, "mean": 9.333333333333334, "min": 4, "std": 3.7712361663282534}, - "train": {"max": 9, "mean": 5.666666666666667, "min": 2, "std": 2.8674417556808756}, - "validation": {"max": 38, "mean": 18.333333333333332, "min": 6, "std": 14.055445761538678}, - } - - @dataclasses.dataclass - class TokenDocumentWithLabeledEntities(TokenBasedDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="tokens") - - statistic = SpanLengthCollector( - layer="entities", - tokenize=True, - tokenizer="bert-base-uncased", - tokenized_document_type=TokenDocumentWithLabeledEntities, - ) - values = statistic(dataset) - assert values == { - "test": {"len": 5, "max": 4, "mean": 2.4, "min": 1, "std": 1.2000000000000002}, - "train": {"len": 5, "max": 2, "mean": 1.2, "min": 1, "std": 0.4}, - "validation": { - "len": 6, - "max": 2, - "mean": 1.3333333333333333, - "min": 1, - "std": 0.4714045207910317, - }, - } diff --git a/tests/data/dataset_tester.py b/tests/data/dataset_tester.py deleted file mode 100644 index b4667c82..00000000 --- a/tests/data/dataset_tester.py +++ /dev/null @@ -1,131 +0,0 @@ -import os -import tempfile -from typing import List, Optional -from unittest import TestCase - -from datasets.builder import BuilderConfig -from datasets.download.download_manager import DownloadMode -from datasets.download.mock_download_manager import MockDownloadManager -from datasets.load import dataset_module_factory, import_main_class -from datasets.utils.file_utils import DownloadConfig, is_remote_url -from datasets.utils.logging import get_logger - -from pytorch_ie.data.builder import ArrowBasedBuilder, GeneratorBasedBuilder -from tests import DATASET_BUILDERS_ROOT - -logger = get_logger(__name__) - - -# Taken from https://github.com/huggingface/datasets/blob/207be676bffe9d164740a41a883af6125edef135/tests/test_dataset_common.py#L101 -class DatasetTester: - def __init__(self, parent): - self.parent = parent if parent is not None else TestCase() - - def load_builder_class(self, dataset_name, is_local=False): - # Download/copy dataset script - if is_local is True: - dataset_module = dataset_module_factory( - os.path.join(DATASET_BUILDERS_ROOT, dataset_name) - ) - else: - dataset_module = dataset_module_factory( - dataset_name, download_config=DownloadConfig(force_download=True) - ) - # Get dataset builder class - builder_cls = import_main_class(dataset_module.module_path) - return builder_cls - - def load_all_configs(self, dataset_name, is_local=False) -> List[Optional[BuilderConfig]]: - # get builder class - builder_cls = self.load_builder_class(dataset_name, is_local=is_local) - builder = builder_cls - - if len(builder.BUILDER_CONFIGS) == 0: - return [None] - return builder.BUILDER_CONFIGS - - def check_load_dataset( - self, dataset_name, configs, is_local=False, use_local_dummy_data=False - ): - for config in configs: - with tempfile.TemporaryDirectory() as processed_temp_dir, tempfile.TemporaryDirectory() as raw_temp_dir: - # create config and dataset - dataset_builder_cls = self.load_builder_class(dataset_name, is_local=is_local) - name = config.name if config is not None else None - dataset_builder = dataset_builder_cls( - config_name=name, cache_dir=processed_temp_dir - ) - - # TODO: skip Beam datasets and datasets that lack dummy data for now - if not isinstance(dataset_builder, (ArrowBasedBuilder, GeneratorBasedBuilder)): - logger.info("Skip tests for this dataset for now") - return - - if config is not None: - version = config.version - else: - version = dataset_builder.VERSION - - def check_if_url_is_valid(url): - if is_remote_url(url) and "\\" in url: - raise ValueError(f"Bad remote url '{url} since it contains a backslash") - - # create mock data loader manager that has a special download_and_extract() method to download dummy data instead of real data - mock_dl_manager = MockDownloadManager( - dataset_name=dataset_name, - config=config, - version=version, - cache_dir=raw_temp_dir, - use_local_dummy_data=use_local_dummy_data, - download_callbacks=[check_if_url_is_valid], - ) - mock_dl_manager.datasets_scripts_dir = str(DATASET_BUILDERS_ROOT) - - # packaged datasets like csv, text, json or pandas require some data files - # builder_name = dataset_builder.__class__.__name__.lower() - # if builder_name in _PACKAGED_DATASETS_MODULES: - # mock_dl_manager.download_dummy_data() - # path_to_dummy_data = mock_dl_manager.dummy_file - # dataset_builder.config.data_files = get_packaged_dataset_dummy_data_files( - # builder_name, path_to_dummy_data - # ) - # for config_attr, value in get_packaged_dataset_config_attributes(builder_name).items(): - # setattr(dataset_builder.config, config_attr, value) - - # mock size needed for dummy data instead of actual dataset - if dataset_builder.info is not None: - # approximate upper bound of order of magnitude of dummy data files - one_mega_byte = 2 << 19 - dataset_builder.info.size_in_bytes = 2 * one_mega_byte - dataset_builder.info.download_size = one_mega_byte - dataset_builder.info.dataset_size = one_mega_byte - - # generate examples from dummy data - dataset_builder.download_and_prepare( - dl_manager=mock_dl_manager, - download_mode=DownloadMode.FORCE_REDOWNLOAD, - verification_mode="no_checks", - try_from_hf_gcs=False, - ) - - # get dataset - dataset = dataset_builder.as_dataset(verification_mode="no_checks") - - # check that dataset is not empty - self.parent.assertListEqual( - sorted(dataset_builder.info.splits.keys()), sorted(dataset) - ) - for split in dataset_builder.info.splits.keys(): - # check that loaded datset is not empty - self.parent.assertTrue(len(dataset[split]) > 0) - - # check that we can cast features for each task template - task_templates = dataset_builder.info.task_templates - if task_templates: - for task in task_templates: - task_features = {**task.input_schema, **task.label_schema} - for split in dataset: - casted_dataset = dataset[split].prepare_for_task(task) - self.parent.assertDictEqual(task_features, casted_dataset.features) - del casted_dataset - del dataset diff --git a/tests/data/test_builder.py b/tests/data/test_builder.py deleted file mode 100644 index 5112a8e1..00000000 --- a/tests/data/test_builder.py +++ /dev/null @@ -1,224 +0,0 @@ -import re -import tempfile -from dataclasses import dataclass -from typing import Type - -import pytest -from datasets import DatasetBuilder, Version -from datasets.load import dataset_module_factory, import_main_class - -from pytorch_ie.annotations import LabeledSpan, Span -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.data.builder import PieDatasetBuilder -from pytorch_ie.documents import TextBasedDocument, TextDocumentWithSpans -from tests import FIXTURES_ROOT - -DATASETS_ROOT = FIXTURES_ROOT / "builder" / "datasets" - - -def test_builder_class(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "single_config")) - builder_cls = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - builder = builder_cls(cache_dir=tmp_cache_dir) - assert isinstance(builder, DatasetBuilder) - - -def test_builder_class_with_kwargs(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "single_config")) - builder_cls = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - builder = builder_cls(cache_dir=tmp_cache_dir, parameter="test") - assert isinstance(builder, DatasetBuilder) - assert builder.config.parameter == "test" - - -def test_builder_class_with_kwargs_wrong_parameter(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "single_config")) - builder_cls = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - # this should raise an exception because the base config does not know the parameter - with pytest.raises( - TypeError, - match=re.escape("__init__() got an unexpected keyword argument 'unknown_parameter'"), - ): - builder = builder_cls( - cache_dir=tmp_cache_dir, parameter="test", unknown_parameter="test_unknown" - ) - - -def test_builder_class_with_base_dataset_kwargs(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "single_config")) - builder_cls = import_main_class(dataset_module.module_path) - base_dataset_kwargs = dict(version=Version("0.0.0"), description="new description") - with tempfile.TemporaryDirectory() as tmp_cache_dir: - builder = builder_cls(cache_dir=tmp_cache_dir, base_dataset_kwargs=base_dataset_kwargs) - assert isinstance(builder, DatasetBuilder) - assert builder.base_builder.config.version == "0.0.0" - assert builder.base_builder.config.description == "new description" - - -def test_builder_class_with_base_dataset_kwargs_wrong_parameter(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "single_config")) - builder_cls = import_main_class(dataset_module.module_path) - base_dataset_kwargs = dict(unknown_base_parameter="base_parameter_value") - with tempfile.TemporaryDirectory() as tmp_cache_dir: - # this should raise an exception because the base config does not know the parameter - with pytest.raises( - TypeError, - match=re.escape( - "__init__() got an unexpected keyword argument 'unknown_base_parameter'" - ), - ): - builder = builder_cls(cache_dir=tmp_cache_dir, base_dataset_kwargs=base_dataset_kwargs) - - -def test_builder_class_multi_configs(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "multi_config")) - builder_cls = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - with pytest.raises(ValueError, match="Config name is missing."): - builder = builder_cls(cache_dir=tmp_cache_dir) - - builder = builder_cls(config_name="es", cache_dir=tmp_cache_dir) - assert isinstance(builder, DatasetBuilder) - - -def test_builder_class_name_mapping(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "name_mapping")) - builder_cls = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - builder = builder_cls(config_name="es", cache_dir=tmp_cache_dir) - assert builder.info.config_name == "es" - assert builder.base_builder.info.config_name == "nl" - - builder = builder_cls(config_name="nl", cache_dir=tmp_cache_dir) - assert builder.info.config_name == "nl" - assert builder.base_builder.info.config_name == "nl" - - -def test_builder_class_name_mapping_disabled(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "name_mapping_disabled")) - builder_cls = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - # this should raise an exception because the config name is not passed - with pytest.raises(ValueError, match="Config name is missing."): - builder = builder_cls(config_name="es", cache_dir=tmp_cache_dir) - - # here we set the base config name via base_dataset_kwargs - builder = builder_cls( - config_name="es", cache_dir=tmp_cache_dir, base_dataset_kwargs=dict(name="nl") - ) - assert builder.info.config_name == "es" - assert builder.base_builder.info.config_name == "nl" - - -def test_builder_class_name_mapping_and_defaults(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "default_config_kwargs")) - builder_cls = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - # this comes from passing the config as base config name - builder = builder_cls(config_name="es", cache_dir=tmp_cache_dir) - assert builder.info.config_name == "es" - assert builder.base_builder.info.config_name == "es" - - # this gets created by the default setting from BASE_CONFIG_KWARGS_DICT - builder = builder_cls(config_name="nl", cache_dir=tmp_cache_dir) - assert builder.info.config_name == "nl" - assert builder.base_builder.info.config_name == "default" - assert builder.base_builder.info.version == "0.0.0" - - -def test_wrong_builder_class_config(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "wrong_builder_class_config")) - builder_cls = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - # This should raise an exception because the base builder is derived from GeneratorBasedBuilder, - # but the PIE dataset builder is derived from ArrowBasedBuilder. - with pytest.raises( - TypeError, - match=re.escape( - "The PyTorch-IE dataset builder class 'Example' is derived from " - ", but the base builder is not which is not allowed. " - "The base builder is of type 'Conll2003' that is derived from " - ". Consider to derive your PyTorch-IE dataset builder " - "'Example' from a PyTorch-IE variant of 'GeneratorBasedBuilder'." - ), - ): - builder_cls(cache_dir=tmp_cache_dir) - - -def test_builder_with_document_converters_rename(): - @dataclass - class RenamedExampleDocument(TextBasedDocument): - spans: AnnotationList[LabeledSpan] = annotation_field(target="text") - - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "single_config")) - builder_cls: Type[PieDatasetBuilder] = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - builder = builder_cls( - cache_dir=tmp_cache_dir, - document_converters={ - RenamedExampleDocument: {"entities": "spans"}, - }, - ) - assert isinstance(builder, PieDatasetBuilder) - assert builder.document_converters == { - RenamedExampleDocument: {"entities": "spans"}, - } - - -@dataclass -class ExampleDocumentWithSimpleSpans(TextBasedDocument): - spans: AnnotationList[Span] = annotation_field(target="text") - - -def convert_example_document_to_example_document_with_simple_spans( - document: TextDocumentWithSpans, -) -> ExampleDocumentWithSimpleSpans: - result = ExampleDocumentWithSimpleSpans(text=document.text, spans=document.spans) - for entity in document.spans: - result.spans.append(Span(start=entity.start, end=entity.end)) - return result - - -def test_builder_with_document_converters_resolve_document_type_and_converter(): - @dataclass - class RenamedExampleDocument(TextBasedDocument): - spans: AnnotationList[LabeledSpan] = annotation_field(target="text") - - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "single_config")) - builder_cls: Type[PieDatasetBuilder] = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - builder = builder_cls( - cache_dir=tmp_cache_dir, - document_converters={ - "tests.data.test_builder.ExampleDocumentWithSimpleSpans": "tests.data.test_builder.convert_example_document_to_example_document_with_simple_spans", - }, - ) - assert isinstance(builder, PieDatasetBuilder) - assert builder.document_converters == { - ExampleDocumentWithSimpleSpans: convert_example_document_to_example_document_with_simple_spans, - } - - -class NoDocumentType: - pass - - -def test_builder_with_document_converters_resolve_wrong_document_type(): - dataset_module = dataset_module_factory(str(DATASETS_ROOT / "single_config")) - builder_cls: Type[PieDatasetBuilder] = import_main_class(dataset_module.module_path) - with tempfile.TemporaryDirectory() as tmp_cache_dir: - with pytest.raises( - TypeError, - match=re.escape( - "The key 'tests.data.test_builder.NoDocumentType' for one of the converters can not be resolved to a document type." - ), - ): - builder = builder_cls( - cache_dir=tmp_cache_dir, - document_converters={ - "tests.data.test_builder.NoDocumentType": convert_example_document_to_example_document_with_simple_spans, - }, - ) diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py deleted file mode 100644 index 7e2d89ee..00000000 --- a/tests/data/test_dataset.py +++ /dev/null @@ -1,463 +0,0 @@ -from collections.abc import Iterator, Sequence -from dataclasses import dataclass -from typing import Union - -import datasets -import numpy -import pytest -import torch - -from pytorch_ie import Dataset, IterableDataset -from pytorch_ie.annotations import BinaryRelation, Label, LabeledSpan, Span -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.core.taskmodule import ( - IterableTaskEncodingDataset, - TaskEncodingDataset, - TaskEncodingSequence, -) -from pytorch_ie.data.dataset import get_pie_dataset_type -from pytorch_ie.documents import TextDocument -from pytorch_ie.taskmodules import TransformerSpanClassificationTaskModule -from tests import _HF_CONLL2003_IS_AVAILABLE, DATASET_BUILDERS_ROOT -from tests.conftest import TestDocument - - -@pytest.fixture(scope="module") -def taskmodule(): - tokenizer_name_or_path = "bert-base-cased" - taskmodule = TransformerSpanClassificationTaskModule( - tokenizer_name_or_path=tokenizer_name_or_path, - entity_annotation="entities", - ) - return taskmodule - - -@pytest.fixture -def model_output(): - return { - "logits": torch.from_numpy( - numpy.log( - [ - # O, ORG, PER - [0.5, 0.2, 0.3], - [0.1, 0.1, 0.8], - [0.1, 0.5, 0.4], - [0.1, 0.4, 0.5], - [0.1, 0.6, 0.3], - ] - ) - ), - "start_indices": torch.tensor([1, 1, 7, 1, 6]), - "end_indices": torch.tensor([2, 4, 7, 4, 6]), - "batch_indices": torch.tensor([0, 1, 1, 2, 2]), - } - - -def test_dataset(maybe_iterable_dataset): - dataset = { - k: list(v) if isinstance(v, IterableDataset) else v - for k, v in maybe_iterable_dataset.items() - } - assert set(dataset.keys()) == {"train", "validation", "test"} - - assert len(dataset["train"]) == 8 - assert len(dataset["validation"]) == 2 - assert len(dataset["test"]) == 2 - - train_doc5 = dataset["train"][4] - assert train_doc5.id == "train_doc5" - assert len(train_doc5.sentences) == 3 - assert len(train_doc5.entities) == 3 - assert len(train_doc5.relations) == 3 - - assert str(train_doc5.sentences[1]) == "Entity G works at H." - - -def test_dataset_index(dataset): - train_dataset = dataset["train"] - assert train_dataset[4].id == "train_doc5" - assert [doc.id for doc in train_dataset[0, 3, 5]] == ["train_doc1", "train_doc4", "train_doc6"] - assert [doc.id for doc in train_dataset[2:5]] == ["train_doc3", "train_doc4", "train_doc5"] - - -def test_dataset_map(maybe_iterable_dataset): - train_dataset = maybe_iterable_dataset["train"] - - def clear_relations(document): - document.relations.clear() - return document - - assert sum(len(doc.relations) for doc in train_dataset) == 7 - - mapped_dataset1 = train_dataset.map(clear_relations) - - assert sum(len(doc.relations) for doc in mapped_dataset1) == 0 - assert sum(len(doc.relations) for doc in train_dataset) == 7 - - -def test_dataset_map_batched(maybe_iterable_dataset): - train_dataset = maybe_iterable_dataset["train"] - - def clear_relations_batched(documents): - assert len(documents) == 2 - for document in documents: - document.relations.clear() - return documents - - assert sum(len(doc.relations) for doc in train_dataset) == 7 - - mapped_dataset1 = train_dataset.map(clear_relations_batched, batched=True, batch_size=2) - - assert sum(len(doc.relations) for doc in mapped_dataset1) == 0 - assert sum(len(doc.relations) for doc in train_dataset) == 7 - - -def test_dataset_map_with_result_document_type(maybe_iterable_dataset): - @dataclass - class TestDocument(TextDocument): - sentences: AnnotationList[Span] = annotation_field(target="text") - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - relations: AnnotationList[BinaryRelation] = annotation_field(target="entities") - - @dataclass - class TestDocumentWithTokensButNoRelations(TextDocument): - sentences: AnnotationList[Span] = annotation_field(target="text") - tokens: AnnotationList[Span] = annotation_field(target="text") - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - def clear_relations_and_add_one_token( - document: TestDocument, - ) -> TestDocumentWithTokensButNoRelations: - document.relations.clear() - # the conversion here is not really necessary, but to have correct typing - result = document.as_type(TestDocumentWithTokensButNoRelations) - # subtract 1 to create a Span different from the sentence to account for - # https://github.com/ChristophAlt/pytorch-ie/pull/222 - result.tokens.append(Span(0, len(document.text) - 1)) - return result - - train_dataset = maybe_iterable_dataset["train"] - - assert sum(len(doc.relations) for doc in train_dataset) == 7 - - mapped_dataset1 = train_dataset.map( - clear_relations_and_add_one_token, - result_document_type=TestDocumentWithTokensButNoRelations, - ) - - assert sum(len(doc.relations) for doc in train_dataset) == 7 - - doc0 = list(train_dataset)[0] - doc0_mapped = list(mapped_dataset1)[0] - assert len(doc0_mapped.tokens) == 1 - token = doc0_mapped.tokens[0] - assert token.start == 0 - assert token.end == len(doc0.text) - 1 - # check field names because isinstance does not work (the code of the document types - # is the same, but lives at different locations) - assert {f.name for f in doc0.fields()} == {f.name for f in TestDocument.fields()} - assert {f.name for f in doc0_mapped.fields()} == { - f.name for f in TestDocumentWithTokensButNoRelations.fields() - } - - -@pytest.mark.parametrize("encode_target", [False, True]) -@pytest.mark.parametrize("inplace", [False, True]) -@pytest.mark.parametrize("as_dataset", [False, True]) -def test_dataset_with_taskmodule( - maybe_iterable_dataset, taskmodule, model_output, encode_target, inplace, as_dataset -): - train_dataset = maybe_iterable_dataset["train"] - - taskmodule.prepare(train_dataset) - assert set(taskmodule.label_to_id.keys()) == {"PER", "ORG", "O"} - assert [taskmodule.id_to_label[i] for i in range(3)] == ["O", "ORG", "PER"] - assert taskmodule.label_to_id["O"] == 0 - - as_task_encoding_sequence = not encode_target - as_iterator = isinstance(train_dataset, (IterableDataset, Iterator)) - if as_task_encoding_sequence: - if as_iterator: - with pytest.raises( - ValueError, match="can not return a TaskEncodingSequence as Iterator" - ): - taskmodule.encode( - train_dataset, encode_target=encode_target, as_dataset=as_dataset - ) - return - if as_dataset: - with pytest.raises( - ValueError, match="can not return a TaskEncodingSequence as a dataset" - ): - taskmodule.encode( - train_dataset, encode_target=encode_target, as_dataset=as_dataset - ) - return - - task_encodings = taskmodule.encode( - train_dataset, encode_target=encode_target, as_dataset=as_dataset - ) - - if as_iterator: - if as_task_encoding_sequence: - raise NotImplementedError("this is not yet implemented") - if as_dataset: - assert isinstance(task_encodings, IterableTaskEncodingDataset) - else: - assert isinstance(task_encodings, Iterator) - else: - if as_dataset: - if as_task_encoding_sequence: - raise NotImplementedError("this is not yet implemented") - else: - assert isinstance(task_encodings, TaskEncodingDataset) - else: - if as_task_encoding_sequence: - assert isinstance(task_encodings, TaskEncodingSequence) - else: - assert isinstance(task_encodings, Sequence) - - task_encoding_list = list(task_encodings) - assert len(task_encoding_list) == 8 - task_encoding = task_encoding_list[5] - document = list(train_dataset)[5] - assert task_encoding.document == document - assert "input_ids" in task_encoding.inputs - assert ( - taskmodule.tokenizer.decode(task_encoding.inputs["input_ids"], skip_special_tokens=True) - == document.text - ) - - if encode_target: - assert task_encoding.targets == [ - (1, 4, taskmodule.label_to_id["PER"]), - (6, 6, taskmodule.label_to_id["ORG"]), - (9, 9, taskmodule.label_to_id["ORG"]), - ] - else: - assert not task_encoding.has_targets - - unbatched_outputs = taskmodule.unbatch_output(model_output) - - decoded_documents = taskmodule.decode( - task_encodings=task_encodings, - task_outputs=unbatched_outputs, - inplace=inplace, - ) - - if isinstance(train_dataset, Dataset): - assert len(decoded_documents) == len(train_dataset) - - assert {id(doc) for doc in decoded_documents}.isdisjoint({id(doc) for doc in train_dataset}) - - expected_scores = [0.8, 0.5, 0.5, 0.6] - i = 0 - for document in decoded_documents: - for entity_expected, entity_decoded in zip( - document["entities"], document["entities"].predictions - ): - assert entity_expected.start == entity_decoded.start - assert entity_expected.end == entity_decoded.end - assert entity_expected.label == entity_decoded.label - assert expected_scores[i] == pytest.approx(entity_decoded.score) - i += 1 - - for document in train_dataset: - assert not document["entities"].predictions - - -@pytest.mark.skipif( - not _HF_CONLL2003_IS_AVAILABLE, - reason="the Huggingface conll2003 dataset is not reachable and the local PIE-variant depends on it", -) -def test_load_with_hf_datasets(): - dataset_path = DATASET_BUILDERS_ROOT / "conll2003" - - dataset = datasets.load_dataset( - path=str(dataset_path), - ) - - assert set(dataset.keys()) == {"train", "validation", "test"} - - assert len(dataset["train"]) == 14041 - assert len(dataset["validation"]) == 3250 - assert len(dataset["test"]) == 3453 - - -@pytest.mark.skipif( - not _HF_CONLL2003_IS_AVAILABLE, - reason="the Huggingface conll2003 dataset is not reachable and the remote PIE-variant depends on it", -) -def test_load_with_hf_datasets_from_hub(): - dataset = datasets.load_dataset( - path="pie/conll2003", - ) - - assert set(dataset.keys()) == {"train", "validation", "test"} - - assert len(dataset["train"]) == 14041 - assert len(dataset["validation"]) == 3250 - assert len(dataset["test"]) == 3453 - - -def test_get_pie_dataset_type(json_dataset, iterable_json_dataset): - assert get_pie_dataset_type(json_dataset["train"]) == Dataset - assert get_pie_dataset_type(iterable_json_dataset["train"]) == IterableDataset - with pytest.raises(TypeError) as excinfo: - get_pie_dataset_type("not a dataset") - assert ( - str(excinfo.value) - == "the dataset must be of type Dataset or IterableDataset, but is of type " - ) - - -@dataclass -class TestDocumentWithLabel(TextDocument): - label: AnnotationList[Label] = annotation_field() - - -def convert_to_document_with_label(document: TestDocument) -> TestDocumentWithLabel: - result = TestDocumentWithLabel(text=document.text) - result.label.append(Label(label="label")) - return result - - -@pytest.fixture -def dataset_with_converter_functions(maybe_iterable_dataset) -> Union[Dataset, IterableDataset]: - train_dataset: Union[Dataset, IterableDataset] = maybe_iterable_dataset["train"] - assert len(train_dataset.document_converters) == 0 - - train_dataset.register_document_converter(convert_to_document_with_label) - return train_dataset - - -def test_register_document_converter_function(dataset_with_converter_functions): - - assert len(dataset_with_converter_functions.document_converters) == 1 - assert TestDocumentWithLabel in dataset_with_converter_functions.document_converters - assert ( - dataset_with_converter_functions.document_converters[TestDocumentWithLabel] - == convert_to_document_with_label - ) - - -@dataclass -class TestDocumentWithLabeledSpans(TextDocument): - spans: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -@pytest.fixture -def dataset_with_converter_mapping(maybe_iterable_dataset) -> Union[Dataset, IterableDataset]: - train_dataset: Union[Dataset, IterableDataset] = maybe_iterable_dataset["train"] - assert len(train_dataset.document_converters) == 0 - - field_mapping = {"entities": "spans"} - train_dataset.register_document_converter( - converter=field_mapping, document_type=TestDocumentWithLabeledSpans - ) - return train_dataset - - -def test_register_document_converter_mapping(dataset_with_converter_mapping): - assert len(dataset_with_converter_mapping.document_converters) == 1 - assert TestDocumentWithLabeledSpans in dataset_with_converter_mapping.document_converters - assert dataset_with_converter_mapping.document_converters[TestDocumentWithLabeledSpans] == { - "entities": "spans" - } - - -def test_to_document_type_function(dataset_with_converter_functions): - assert dataset_with_converter_functions.document_type == TestDocument - converted_dataset = dataset_with_converter_functions.to_document_type(TestDocumentWithLabel) - assert converted_dataset.document_type == TestDocumentWithLabel - - assert len(converted_dataset.document_converters) == 0 - for doc in converted_dataset: - assert isinstance(doc, TestDocumentWithLabel) - assert len(doc.label) == 1 - assert doc.label[0].label == "label" - - -def test_to_document_type_mapping(dataset_with_converter_mapping): - assert dataset_with_converter_mapping.document_type == TestDocument - converted_dataset = dataset_with_converter_mapping.to_document_type( - TestDocumentWithLabeledSpans - ) - assert converted_dataset.document_type == TestDocumentWithLabeledSpans - - assert len(converted_dataset.document_converters) == 0 - for doc_converted, doc in zip(converted_dataset, dataset_with_converter_mapping): - assert isinstance(doc, TestDocument) - assert isinstance(doc_converted, TestDocumentWithLabeledSpans) - assert "spans" in doc_converted - assert doc_converted.spans == doc.entities - original_annotation_field_names = {f.name for f in doc.annotation_fields()} - assert original_annotation_field_names == {"sentences", "entities", "relations"} - for annotation_field_name in original_annotation_field_names: - assert annotation_field_name not in doc_converted - - -def test_to_document_type_noop(maybe_iterable_dataset): - train_dataset: Union[Dataset, IterableDataset] = maybe_iterable_dataset["train"] - assert len(train_dataset.document_converters) == 0 - train_dataset.register_document_converter( - convert_to_document_with_label, document_type=TestDocument - ) - assert train_dataset.document_type == TestDocument - converted_dataset = train_dataset.to_document_type(TestDocument) - # the conversion should be a noop - assert converted_dataset.document_type == TestDocument - assert converted_dataset == train_dataset - assert len(converted_dataset.document_converters) == 1 - assert TestDocument in converted_dataset.document_converters - assert converted_dataset.document_converters[TestDocument] == convert_to_document_with_label - - -def test_to_document_type_convert_and_cast(dataset_with_converter_functions): - @dataclass - class TestDocumentWithLabelAndSpans(TestDocumentWithLabel): - label: AnnotationList[Label] = annotation_field() - spans: AnnotationList[Span] = annotation_field(target="text") - - assert dataset_with_converter_functions.document_type == TestDocument - # The only converter is registered for TestDocumentWithLabel, but we request a conversion to - # TestDocumentWithLabelAndSpans which is a *subclass* of TestDocumentWithLabel. This is a valid type - # and the conversion is performed by first converting to TestDocumentWithLabel and then casting - # to TestDocumentWithLabelAndSpans. - converted_dataset = dataset_with_converter_functions.to_document_type( - TestDocumentWithLabelAndSpans - ) - assert converted_dataset.document_type == TestDocumentWithLabelAndSpans - - assert len(converted_dataset.document_converters) == 0 - for converted_doc, doc in zip(converted_dataset, dataset_with_converter_functions): - assert isinstance(doc, TestDocument) - assert isinstance(converted_doc, TestDocumentWithLabelAndSpans) - assert converted_doc.text == doc.text - assert len(converted_doc.label) == 1 - assert converted_doc.label[0].label == "label" - assert len(converted_doc.spans) == 0 - - -def test_to_document_type_not_found(dataset_with_converter_functions): - assert dataset_with_converter_functions.document_type == TestDocument - - @dataclass - class TestDocumentWithSpans(TestDocument): - spans: AnnotationList[Span] = annotation_field(target="text") - - # The only converter is registered for TestDocumentWithLabel, but we request a conversion to - # TestDocumentWithSpans. This is not a valid type because it is neither a subclass nor a superclass of - # TestDocumentWithLabel, so an error is raised. - with pytest.raises(ValueError) as excinfo: - dataset_with_converter_functions.to_document_type(TestDocumentWithSpans) - assert ( - str(excinfo.value) - == "No valid key (either subclass or superclass) was found for the document type " - "'.TestDocumentWithSpans'>' " - "in the document_converters of the dataset. Available keys: " - "{}. Consider adding a respective converter " - "to the dataset with dataset.register_document_converter(my_converter_method) where " - "my_converter_method should accept as input and return " - "'.TestDocumentWithSpans'>'." - ) diff --git a/tests/data/test_dataset_casting.py b/tests/data/test_dataset_casting.py deleted file mode 100644 index 6fb991cc..00000000 --- a/tests/data/test_dataset_casting.py +++ /dev/null @@ -1,236 +0,0 @@ -import re -from dataclasses import dataclass - -import pytest - -from pytorch_ie import Dataset, IterableDataset -from pytorch_ie.annotations import LabeledSpan, Span -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument - - -@dataclass -class CoNLL2002Document(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -@dataclass -class DocumentWithParts(TextDocument): - parts: AnnotationList[Span] = annotation_field(target="text") - - -@dataclass -class CoNLL2002WithPartsDocument(CoNLL2002Document, DocumentWithParts): - pass - - -@dataclass -class DocumentWithEnts(TextDocument): - ents: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -@dataclass -class DocumentWithEntsWrongType(TextDocument): - ents: AnnotationList[Span] = annotation_field(target="text") - - -@dataclass -class DocumentWithEntsAndParts(DocumentWithParts, DocumentWithEnts): - pass - - -@dataclass -class DocumentWithPartsAndEntitiesSwapped(TextDocument): - parts: AnnotationList[LabeledSpan] = annotation_field(target="text") - entities: AnnotationList[Span] = annotation_field(target="text") - - -@pytest.fixture() -def dataset_train(maybe_iterable_dataset): - return maybe_iterable_dataset["train"].cast_document_type( - CoNLL2002Document, remove_columns=True - ) - - -def _add_full_part(doc: DocumentWithParts) -> DocumentWithParts: - doc.parts.append(Span(start=0, end=len(doc.text))) - return doc - - -def _get_doc(ds): - # use the second document since it has entities - IDX = 2 - if isinstance(ds, Dataset): - return ds[IDX] - elif isinstance(ds, IterableDataset): - it = iter(ds) - doc = None - for i in range(IDX + 1): - doc = next(it) - return doc - - -def test_cast_document_type(dataset_train): - casted = dataset_train.cast_document_type(CoNLL2002WithPartsDocument) - doc0_orig = _get_doc(dataset_train) - with_parts = casted.map(lambda doc: _add_full_part(doc)) - assert "entities" in with_parts.column_names - assert "parts" in with_parts.column_names - doc0 = _get_doc(with_parts) - assert set(doc0) == {"entities", "parts"} - assert doc0.entities == doc0_orig.entities - - part0 = doc0.parts[0] - assert isinstance(part0, Span) - assert part0.start == 0 - assert part0.end == len(doc0.text) - - -def test_cast_document_type_remove_field(dataset_train): - doc0_orig = _get_doc(dataset_train) - casted = dataset_train.cast_document_type(DocumentWithParts, remove_columns=True) - with_partitions = casted.map(lambda doc: _add_full_part(doc)) - assert "entities" not in with_partitions.column_names - assert "parts" in with_partitions.column_names - doc0 = _get_doc(with_partitions) - assert set(doc0) == {"parts"} - - part0 = doc0.parts[0] - assert isinstance(part0, Span) - assert part0.start == 0 - assert part0.end == len(doc0.text) - - casted_back = with_partitions.cast_document_type(CoNLL2002Document) - assert "entities" in casted_back.column_names - # original entities are not available anymore after casting back - assert len(doc0_orig.entities) > 0 - assert len(list(casted_back)[0].entities) == 0 - - -def test_cast_document_type_recover_field(dataset_train): - doc_orig = _get_doc(dataset_train) - casted = dataset_train.cast_document_type(DocumentWithParts) - # "entities" stay in the arrow table because remove_columns=False per default - assert "entities" in casted.column_names - assert "parts" in casted.column_names - - doc_casted = _get_doc(casted) - assert set(doc_casted) == {"parts"} - - casted_back = casted.cast_document_type(CoNLL2002Document) - assert "entities" in casted_back.column_names - # original entities are recovered after casting back - doc_back = _get_doc(casted_back) - assert len(doc_back.entities) > 0 - assert doc_back.entities == doc_orig.entities - - -def test_cast_document_type_recover_field_with_mapping(dataset_train): - doc_orig = _get_doc(dataset_train) - casted = dataset_train.cast_document_type(DocumentWithParts) - # "entities" stay in the arrow table because remove_columns=False per default - assert "entities" in casted.column_names - assert "parts" in casted.column_names - - doc_casted = _get_doc(casted) - assert set(doc_casted) == {"parts"} - - casted_back = casted.cast_document_type( - DocumentWithEntsAndParts, field_mapping={"entities": "ents"} - ) - assert "ents" in casted_back.column_names - # original entities are recovered after casting back - doc_back = _get_doc(casted_back) - assert len(doc_back.ents) > 0 - assert doc_back.ents == doc_orig.entities - - -def test_cast_document_type_recover_field_wrong(dataset_train): - casted = dataset_train.cast_document_type(DocumentWithEntsAndParts) - # "entities" stay in the arrow table because remove_columns=False per default - assert "entities" in casted.column_names - assert "parts" in casted.column_names - assert "ents" in casted.column_names - - doc_casted = _get_doc(casted) - assert set(doc_casted) == {"parts", "ents"} - - with pytest.raises( - ValueError, - match=re.escape( - "rename targets are already in column names: {'entities'}. Did you miss to set remove_columns=True in a previous call of cast_document_type?" - ), - ): - casted.cast_document_type(CoNLL2002Document, field_mapping={"ents": "entities"}) - - -def test_cast_document_type_rename_field(dataset_train): - doc0_orig = _get_doc(dataset_train) - casted = dataset_train.cast_document_type( - DocumentWithEntsAndParts, field_mapping={"entities": "ents"} - ) - with_parts = casted.map(lambda doc: _add_full_part(doc)) - assert "ents" in with_parts.column_names - assert "parts" in with_parts.column_names - doc0 = _get_doc(with_parts) - assert set(doc0) == {"ents", "parts"} - assert doc0.ents == doc0_orig.entities - - part0 = doc0.parts[0] - assert isinstance(part0, Span) - assert part0.start == 0 - assert part0.end == len(doc0.text) - - -def test_cast_document_type_swap_fields(dataset_train): - if isinstance(dataset_train, IterableDataset): - # TODO: for now, this would fail because datasets.IterableDataset.rename_columns() is too restrictive - # (does not allow swapping) - return - - # just add "parts" to have another field to swap "entities" with - casted = dataset_train.cast_document_type(CoNLL2002WithPartsDocument) - with_parts = casted.map(lambda doc: _add_full_part(doc)) - doc_with_parts = _get_doc(with_parts) - - swapped = with_parts.cast_document_type( - DocumentWithPartsAndEntitiesSwapped, - field_mapping={"entities": "parts", "parts": "entities"}, - ) - assert "entities" in swapped.column_names - assert "parts" in swapped.column_names - doc_swapped = _get_doc(swapped) - assert set(doc_swapped) == {"entities", "parts"} - assert doc_swapped.parts == doc_with_parts.entities - assert doc_swapped.entities == doc_with_parts.parts - - -def test_cast_document_type_rename_source_not_available(dataset_train): - with pytest.raises( - ValueError, - match=re.escape( - "some fields to rename are not in the original document_type or hidden fields: {'not_in_original_document'}" - ), - ): - dataset_train.cast_document_type( - DocumentWithEntsWrongType, field_mapping={"not_in_original_document": "ents"} - ) - - -def test_cast_document_type_rename_target_not_available(dataset_train): - with pytest.raises( - ValueError, - match=re.escape( - "some renamed fields are not in the new document_type: {'not_in_new_document'}" - ), - ): - dataset_train.cast_document_type( - DocumentWithEntsWrongType, field_mapping={"entities": "not_in_new_document"} - ) - - -def test_cast_document_type_rename_wrong_type(dataset_train): - with pytest.raises(ValueError, match=re.escape("new field is not the same as old field:")): - dataset_train.cast_document_type( - DocumentWithEntsWrongType, field_mapping={"entities": "ents"} - ) diff --git a/tests/data/test_dataset_common.py b/tests/data/test_dataset_common.py deleted file mode 100644 index 034d64d9..00000000 --- a/tests/data/test_dataset_common.py +++ /dev/null @@ -1,121 +0,0 @@ -import os -import tempfile - -import pytest -from absl.testing import parameterized -from datasets.builder import BuilderConfig, DatasetBuilder -from datasets.download.download_manager import DownloadMode -from datasets.load import dataset_module_factory, import_main_class, load_dataset -from datasets.utils.file_utils import DownloadConfig - -from tests import DATASET_BUILDERS_ROOT -from tests.data.dataset_tester import DatasetTester - - -def test_datasets_dir_and_script_names(): - for dataset_dir in DATASET_BUILDERS_ROOT.iterdir(): - name = dataset_dir.name - if ( - not name.startswith("__") and len(os.listdir(dataset_dir)) > 0 - ): # ignore __pycache__ and empty dirs - # check that the script name is the same as the dir name - assert os.path.exists( - os.path.join(dataset_dir, name + ".py") - ), f"Bad structure for dataset '{name}'. Please check that the directory name is a valid dataset and that the same the same as the dataset script name." - - # if name in _PACKAGED_DATASETS_MODULES: - # continue - # else: - # # check that the script name is the same as the dir name - # assert os.path.exists( - # os.path.join(dataset_dir, name + ".py") - # ), f"Bad structure for dataset '{name}'. Please check that the directory name is a valid dataset and that the same the same as the dataset script name." - - -def get_local_dataset_names(): - dataset_script_files = list(DATASET_BUILDERS_ROOT.absolute().glob("**/*.py")) - datasets = [ - dataset_script_file.parent.name - for dataset_script_file in dataset_script_files - if dataset_script_file.name != "__init__.py" - ] - return [{"testcase_name": x, "dataset_name": x} for x in datasets] - - -@parameterized.named_parameters(get_local_dataset_names()) -# @for_all_test_methods(skip_if_dataset_requires_faiss, skip_if_not_compatible_with_windows) -class LocalDatasetTest(parameterized.TestCase): - dataset_name = None - - def setUp(self): - self.dataset_tester = DatasetTester(self) - - def test_load_dataset(self, dataset_name): - configs = self.dataset_tester.load_all_configs(dataset_name, is_local=True)[:1] - self.dataset_tester.check_load_dataset( - dataset_name, configs, is_local=True, use_local_dummy_data=True - ) - - def test_builder_class(self, dataset_name): - builder_cls = self.dataset_tester.load_builder_class(dataset_name, is_local=True) - name = builder_cls.BUILDER_CONFIGS[0].name if builder_cls.BUILDER_CONFIGS else None - with tempfile.TemporaryDirectory() as tmp_cache_dir: - builder = builder_cls(config_name=name, cache_dir=tmp_cache_dir) - self.assertIsInstance(builder, DatasetBuilder) - - def test_builder_configs(self, dataset_name): - builder_configs = self.dataset_tester.load_all_configs(dataset_name, is_local=True) - self.assertTrue(len(builder_configs) > 0) - - if builder_configs[0] is not None: - all(self.assertIsInstance(config, BuilderConfig) for config in builder_configs) - - @pytest.mark.slow - def test_load_dataset_all_configs(self, dataset_name): - configs = self.dataset_tester.load_all_configs(dataset_name, is_local=True) - self.dataset_tester.check_load_dataset( - dataset_name, configs, is_local=True, use_local_dummy_data=True - ) - - @pytest.mark.slow - def test_load_real_dataset(self, dataset_name): - path = str(DATASET_BUILDERS_ROOT / dataset_name) - dataset_module = dataset_module_factory( - path, download_config=DownloadConfig(local_files_only=True) - ) - builder_cls = import_main_class(dataset_module.module_path) - name = builder_cls.BUILDER_CONFIGS[0].name if builder_cls.BUILDER_CONFIGS else None - with tempfile.TemporaryDirectory() as temp_cache_dir: - dataset = load_dataset( - path, - name=name, - cache_dir=temp_cache_dir, - download_mode=DownloadMode.FORCE_REDOWNLOAD, - ) - for split in dataset.keys(): - self.assertTrue(len(dataset[split]) > 0) - del dataset - - @pytest.mark.slow - def test_load_real_dataset_all_configs(self, dataset_name): - path = str(DATASET_BUILDERS_ROOT / dataset_name) - dataset_module = dataset_module_factory( - path, download_config=DownloadConfig(local_files_only=True) - ) - builder_cls = import_main_class(dataset_module.module_path) - config_names = ( - [config.name for config in builder_cls.BUILDER_CONFIGS] - if len(builder_cls.BUILDER_CONFIGS) > 0 - else [None] - ) - for name in config_names: - with tempfile.TemporaryDirectory() as temp_cache_dir: - dataset = load_dataset( - path, - name=name, - cache_dir=temp_cache_dir, - download_mode=DownloadMode.FORCE_REDOWNLOAD, - ) - for split in dataset.keys(): - self.assertTrue(len(dataset[split]) > 0) - del dataset diff --git a/tests/data/test_dataset_dict.py b/tests/data/test_dataset_dict.py deleted file mode 100644 index fbd9b268..00000000 --- a/tests/data/test_dataset_dict.py +++ /dev/null @@ -1,541 +0,0 @@ -import logging -from dataclasses import dataclass -from pathlib import Path -from typing import Dict, Iterable, Optional, Union - -import datasets -import pytest - -from pytorch_ie import Dataset, DatasetDict, IterableDataset -from pytorch_ie.annotations import Label, LabeledSpan -from pytorch_ie.core import AnnotationList, Document, annotation_field -from pytorch_ie.data.common import ( - EnterDatasetDictMixin, - EnterDatasetMixin, - ExitDatasetDictMixin, - ExitDatasetMixin, -) -from pytorch_ie.documents import TextBasedDocument, TextDocument -from tests import FIXTURES_ROOT -from tests.conftest import TestDocument - -logger = logging.getLogger(__name__) - -DATA_PATH = FIXTURES_ROOT / "dataset_dict" / "conll2003_extract" - - -@pytest.mark.skip(reason="don't create fixture data again") -def test_create_fixture_data(): - conll2003 = DatasetDict(datasets.load_dataset("pie/conll2003")) - for split in list(conll2003): - # restrict all splits to 3 examples - conll2003 = conll2003.select(split=split, stop=3) - conll2003.to_json(DATA_PATH) - - -@dataclass -class DocumentWithEntitiesAndRelations(TextBasedDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -@pytest.fixture(scope="module") -def dataset_dict(): - return DatasetDict.from_json( - data_dir=DATA_PATH, document_type=DocumentWithEntitiesAndRelations - ) - - -def test_from_json(dataset_dict): - assert set(dataset_dict) == {"train", "test", "validation"} - assert len(dataset_dict["train"]) == 3 - assert len(dataset_dict["test"]) == 3 - assert len(dataset_dict["validation"]) == 3 - - -def test_from_json_no_serialized_document_type(dataset_dict): - with pytest.raises(ValueError) as excinfo: - DatasetDict.from_json(data_dir=DATA_PATH) - assert ( - str(excinfo.value) - == "document_type must be provided if it cannot be loaded from the metadata file" - ) - - -def test_load_dataset(): - dataset_dict = DatasetDict.load_dataset( - "pie/brat", base_dataset_kwargs=dict(data_dir=FIXTURES_ROOT / "datasets" / "brat") - ) - assert isinstance(dataset_dict, DatasetDict) - assert set(dataset_dict) == {"train"} - assert isinstance(dataset_dict["train"], Dataset) - assert len(dataset_dict["train"]) == 2 - assert all(isinstance(doc, Document) for doc in dataset_dict["train"]) - - -@pytest.fixture(scope="module") -def iterable_dataset_dict(): - return DatasetDict.from_json( - data_dir=DATA_PATH, - document_type=DocumentWithEntitiesAndRelations, - streaming=True, - ) - - -def test_iterable_dataset_dict(iterable_dataset_dict): - assert set(iterable_dataset_dict) == {"train", "test", "validation"} - - -def test_to_json_and_back(dataset_dict, tmp_path): - path = Path(tmp_path) / "dataset_dict" - dataset_dict.to_json(path) - dataset_dict_from_json = DatasetDict.from_json( - data_dir=path, - document_type=dataset_dict.document_type, - ) - assert set(dataset_dict_from_json) == set(dataset_dict) - for split in dataset_dict: - assert len(dataset_dict_from_json[split]) == len(dataset_dict[split]) - for doc1, doc2 in zip(dataset_dict_from_json[split], dataset_dict[split]): - assert doc1 == doc2 - - -def test_to_json_and_back_serialize_document_type(dataset_dict, tmp_path): - path = Path(tmp_path) / "dataset_dict" - dataset_dict.to_json(path) - dataset_dict_from_json = DatasetDict.from_json( - data_dir=path, - ) - assert set(dataset_dict_from_json) == set(dataset_dict) - for split in dataset_dict: - assert len(dataset_dict_from_json[split]) == len(dataset_dict[split]) - for doc1, doc2 in zip(dataset_dict_from_json[split], dataset_dict[split]): - assert doc1 == doc2 - - -def test_document_type_empty_no_splits(): - with pytest.raises(ValueError) as excinfo: - DatasetDict().document_type - assert ( - str(excinfo.value) == "dataset does not contain any splits, cannot determine document type" - ) - - -def test_document_type_different_types(dataset_dict): - # load the example dataset as a different document type - dataset_dict_different_type = DatasetDict.from_json( - data_dir=DATA_PATH, - document_type=TextBasedDocument, - ) - assert dataset_dict_different_type.document_type is TextBasedDocument - # create a dataset dict with different document types for train and test splits - dataset_dict_different_types = DatasetDict( - { - "train": dataset_dict["train"], - "test": dataset_dict_different_type["test"], - } - ) - # accessing the document type should raise an error with the message that starts with - # "dataset contains splits with different document types:" - with pytest.raises(ValueError) as excinfo: - dataset_dict_different_types.document_type - assert str(excinfo.value).startswith("dataset contains splits with different document types:") - - -def test_dataset_type(dataset_dict): - assert dataset_dict.dataset_type is Dataset - - -def test_dataset_type_no_splits(): - with pytest.raises(ValueError) as excinfo: - DatasetDict().dataset_type - assert ( - str(excinfo.value) - == "dataset does not contain any splits, cannot determine the dataset type" - ) - - -def test_dataset_type_different_type(dataset_dict, iterable_dataset_dict): - dataset_dict_different_type = DatasetDict( - { - "train": dataset_dict["train"], - "test": iterable_dataset_dict["test"], - } - ) - with pytest.raises(ValueError) as excinfo: - dataset_dict_different_type.dataset_type - assert str(excinfo.value).startswith("dataset contains splits with different dataset types:") - - -def map_fn(doc): - doc.text = doc.text.upper() - return doc - - -@pytest.mark.parametrize( - "function", - [map_fn, "tests.data.test_dataset_dict.map_fn"], -) -def test_map(dataset_dict, function): - dataset_dict_mapped = dataset_dict.map(function) - for split in dataset_dict: - assert len(dataset_dict_mapped[split]) == len(dataset_dict[split]) - for doc1, doc2 in zip(dataset_dict_mapped[split], dataset_dict[split]): - assert doc1.text == doc2.text.upper() - - -def test_map_noop(dataset_dict): - dataset_dict_mapped = dataset_dict.map() - for split in dataset_dict: - assert len(dataset_dict_mapped[split]) == len(dataset_dict[split]) - for doc1, doc2 in zip(dataset_dict_mapped[split], dataset_dict[split]): - assert doc1 == doc2 - - -def test_map_with_result_document_type(dataset_dict): - dataset_dict_mapped = dataset_dict.map(result_document_type=TextBasedDocument) - for split in dataset_dict: - assert len(dataset_dict_mapped[split]) == len(dataset_dict[split]) - for doc1, doc2 in zip(dataset_dict_mapped[split], dataset_dict[split]): - assert isinstance(doc1, TextBasedDocument) - assert isinstance(doc2, DocumentWithEntitiesAndRelations) - assert doc1.text == doc2.text - - -def test_map_with_context_manager(dataset_dict): - class DocumentCounter( - EnterDatasetMixin, ExitDatasetMixin, EnterDatasetDictMixin, ExitDatasetDictMixin - ): - def reset_statistics(self): - self.number = 0 - - def __call__(self, doc): - self.number += 1 - return doc - - def enter_dataset( - self, dataset: Union[Dataset, IterableDataset], name: Optional[str] = None - ) -> None: - self.reset_statistics() - self.split = name - - def exit_dataset( - self, dataset: Union[Dataset, IterableDataset], name: Optional[str] = None - ) -> None: - self.all_docs[self.split] = self.number - - def enter_dataset_dict(self, dataset_dict: DatasetDict) -> None: - self.all_docs: Dict[Optional[str], int] = {} - self.split = None - - def exit_dataset_dict(self, dataset_dict: DatasetDict) -> None: - logger.info(f"Number of documents per split: {self.all_docs}") - - document_counter = DocumentCounter() - # note that we need to disable caching here, otherwise the __call__ method may not be called for any dataset split - dataset_dict_mapped = dataset_dict.map(function=document_counter, load_from_cache_file=False) - assert document_counter.all_docs == {"train": 3, "test": 3, "validation": 3} - - # the document_counter should not have been modified the dataset - assert set(dataset_dict_mapped) == set(dataset_dict) - for split in dataset_dict: - assert len(dataset_dict_mapped[split]) == len(dataset_dict[split]) - for doc1, doc2 in zip(dataset_dict_mapped[split], dataset_dict[split]): - assert doc1 == doc2 - - -def test_select(dataset_dict): - # select documents by index - dataset_dict_selected = dataset_dict.select( - split="train", - indices=[0, 2], - ) - assert len(dataset_dict_selected["train"]) == 2 - assert dataset_dict_selected["train"][0] == dataset_dict["train"][0] - assert dataset_dict_selected["train"][1] == dataset_dict["train"][2] - - # select documents by range - dataset_dict_selected = dataset_dict.select( - split="train", - stop=2, - start=1, - step=1, - ) - assert len(dataset_dict_selected["train"]) == 1 - assert dataset_dict_selected["train"][0] == dataset_dict["train"][1] - - # calling with no arguments that do result in the creation of indices should return the same dataset, - # but will log a warning if other arguments (here "any_arg") are passed - dataset_dict_selected = dataset_dict.select(split="train", any_arg="ignored") - assert len(dataset_dict_selected["train"]) == len(dataset_dict["train"]) - assert dataset_dict_selected["train"][0] == dataset_dict["train"][0] - assert dataset_dict_selected["train"][1] == dataset_dict["train"][1] - assert dataset_dict_selected["train"][2] == dataset_dict["train"][2] - - -def test_rename_splits(dataset_dict): - mapping = { - "train": "train_renamed", - "test": "test_renamed", - "validation": "validation_renamed", - } - dataset_dict_renamed = dataset_dict.rename_splits(mapping) - assert set(dataset_dict_renamed) == set(mapping.values()) - for split in dataset_dict: - split_renamed = mapping[split] - assert len(dataset_dict_renamed[split_renamed]) == len(dataset_dict[split]) - for doc1, doc2 in zip(dataset_dict_renamed[split_renamed], dataset_dict[split]): - assert doc1 == doc2 - - -def test_rename_split_noop(dataset_dict): - dataset_dict_renamed = dataset_dict.rename_splits() - assert set(dataset_dict_renamed) == set(dataset_dict) - for split in dataset_dict: - assert len(dataset_dict_renamed[split]) == len(dataset_dict[split]) - for doc1, doc2 in zip(dataset_dict_renamed[split], dataset_dict[split]): - assert doc1 == doc2 - - -def assert_doc_lists_equal(docs: Iterable[Document], other_docs: Iterable[Document]): - assert all(doc1 == doc2 for doc1, doc2 in zip(docs, other_docs)) - - -def test_add_test_split(dataset_dict): - dataset_dict_with_test = dataset_dict.add_test_split( - source_split="test", target_split="new_test", test_size=1, shuffle=False - ) - assert "new_test" in dataset_dict_with_test - assert len(dataset_dict_with_test["new_test"]) + len(dataset_dict_with_test["test"]) == len( - dataset_dict["test"] - ) - assert len(dataset_dict_with_test["new_test"]) == 1 - assert len(dataset_dict_with_test["test"]) == 2 - assert_doc_lists_equal(dataset_dict_with_test["new_test"], dataset_dict["test"][2:]) - assert_doc_lists_equal(dataset_dict_with_test["test"], dataset_dict["test"][:2]) - test_ids = [doc.id for doc in dataset_dict_with_test["test"]] - new_test_ids = [doc.id for doc in dataset_dict_with_test["new_test"]] - assert set(test_ids).intersection(set(new_test_ids)) == set() - - # remaining splits should be unchanged - assert len(dataset_dict_with_test["train"]) == len(dataset_dict["train"]) - assert len(dataset_dict_with_test["validation"]) == len(dataset_dict["validation"]) - assert_doc_lists_equal(dataset_dict_with_test["train"], dataset_dict["train"]) - assert_doc_lists_equal(dataset_dict_with_test["validation"], dataset_dict["validation"]) - - -def test_drop_splits(dataset_dict): - dataset_dict_dropped = dataset_dict.drop_splits(["train", "validation"]) - assert set(dataset_dict_dropped) == {"test"} - assert len(dataset_dict_dropped["test"]) == len(dataset_dict["test"]) - assert_doc_lists_equal(dataset_dict_dropped["test"], dataset_dict["test"]) - - -def test_concat_splits(dataset_dict): - dataset_dict_concatenated = dataset_dict.concat_splits(["train", "validation"], target="train") - assert set(dataset_dict_concatenated) == {"test", "train"} - assert len(dataset_dict_concatenated["train"]) == len(dataset_dict["train"]) + len( - dataset_dict["validation"] - ) - assert_doc_lists_equal( - dataset_dict_concatenated["train"], - list(dataset_dict["train"]) + list(dataset_dict["validation"]), - ) - - -def test_concat_splits_no_splits(dataset_dict): - with pytest.raises(ValueError) as excinfo: - dataset_dict.concat_splits(splits=[], target="train") - assert str(excinfo.value) == "please provide at least one split to concatenate" - - -def test_concat_splits_different_dataset_types(dataset_dict, iterable_dataset_dict): - dataset_dict_to_concat = DatasetDict( - { - "train": dataset_dict["train"], - "validation": iterable_dataset_dict["validation"], - } - ) - with pytest.raises(ValueError) as excinfo: - dataset_dict_to_concat.concat_splits(splits=["train", "validation"], target="train") - assert str(excinfo.value).startswith("dataset contains splits with different dataset types:") - - -def test_filter(dataset_dict): - dataset_dict_filtered = dataset_dict.filter( - function=lambda doc: len(doc["text"]) > 15, - split="train", - ) - assert all(len(doc.text) > 15 for doc in dataset_dict_filtered["train"]) - assert len(dataset_dict["train"]) == 3 - assert len(dataset_dict_filtered["train"]) == 2 - assert dataset_dict_filtered["train"][0] == dataset_dict["train"][0] - assert dataset_dict_filtered["train"][1] == dataset_dict["train"][2] - - # remaining splits should be unchanged - assert len(dataset_dict_filtered["validation"]) == len(dataset_dict["validation"]) == 3 - assert len(dataset_dict_filtered["test"]) == len(dataset_dict["test"]) == 3 - assert_doc_lists_equal(dataset_dict_filtered["validation"], dataset_dict["validation"]) - assert_doc_lists_equal(dataset_dict_filtered["test"], dataset_dict["test"]) - - -def test_filter_iterable(iterable_dataset_dict): - dataset_dict_filtered = iterable_dataset_dict.filter( - function=lambda doc: len(doc["text"]) > 15, - split="train", - ) - docs_train = list(dataset_dict_filtered["train"]) - assert len(docs_train) == 2 - assert all(len(doc.text) > 15 for doc in docs_train) - - -def test_filter_unknown_dataset_type(): - dataset_dict = DatasetDict({"train": "foo"}) - with pytest.raises(TypeError) as excinfo: - dataset_dict.filter(function=lambda doc: True, split="train") - assert str(excinfo.value) == "dataset must be of type Dataset, but is " - - -def test_filter_noop(dataset_dict): - # passing no filter function should be a noop - dataset_dict_filtered = dataset_dict.filter(split="train") - assert len(dataset_dict_filtered["train"]) == len(dataset_dict["train"]) == 3 - assert len(dataset_dict_filtered["validation"]) == len(dataset_dict["validation"]) == 3 - assert len(dataset_dict_filtered["test"]) == len(dataset_dict["test"]) == 3 - assert_doc_lists_equal(dataset_dict_filtered["train"], dataset_dict["train"]) - assert_doc_lists_equal(dataset_dict_filtered["validation"], dataset_dict["validation"]) - assert_doc_lists_equal(dataset_dict_filtered["test"], dataset_dict["test"]) - - -@pytest.mark.parametrize( - # we can either provide ids or a filter function - "ids,filter_function", - [ - (["1", "2"], None), - (None, lambda doc: doc["id"] in ["1", "2"]), - ], -) -def test_move_to_new_split(dataset_dict, ids, filter_function): - # move the second and third document from train to new_validation - dataset_dict_moved = dataset_dict.move_to_new_split( - ids=ids, - filter_function=filter_function, - source_split="train", - target_split="new_validation", - ) - assert len(dataset_dict_moved["train"]) == 1 - assert len(dataset_dict_moved["new_validation"]) == 2 - assert_doc_lists_equal(dataset_dict_moved["train"], dataset_dict["train"][:1]) - - # the remaining splits should be unchanged - assert len(dataset_dict_moved["validation"]) == len(dataset_dict["validation"]) == 3 - assert len(dataset_dict_moved["test"]) == len(dataset_dict["test"]) == 3 - assert_doc_lists_equal(dataset_dict_moved["validation"], dataset_dict["validation"]) - assert_doc_lists_equal(dataset_dict_moved["test"], dataset_dict["test"]) - - -def test_move_to_new_split_missing_arguments(dataset_dict): - with pytest.raises(ValueError) as excinfo: - dataset_dict.move_to_new_split( - ids=None, - filter_function=None, - source_split="train", - target_split="new_validation", - ) - assert str(excinfo.value) == "please provide either a list of ids or a filter function" - - -def test_cast_document_type(dataset_dict): - dataset_dict_cast = dataset_dict.cast_document_type(TextBasedDocument) - assert dataset_dict_cast.document_type == TextBasedDocument - for split in dataset_dict_cast: - assert all(isinstance(doc, TextBasedDocument) for doc in dataset_dict_cast[split]) - - -@dataclass -class TestDocumentWithLabel(TextDocument): - label: AnnotationList[Label] = annotation_field() - - -def convert_to_document_with_label(document: TestDocument) -> TestDocumentWithLabel: - result = TestDocumentWithLabel(text=document.text) - result.label.append(Label(label="label")) - return result - - -def test_register_document_converter(dataset_dict): - - dataset_dict.register_document_converter( - convert_to_document_with_label, document_type=TestDocumentWithLabel - ) - - for name, split in dataset_dict.items(): - assert split.document_converters[TestDocumentWithLabel] == convert_to_document_with_label - - -def test_register_document_converter_resolve(dataset_dict): - - dataset_dict.register_document_converter( - "tests.data.test_dataset_dict.convert_to_document_with_label", - document_type="tests.data.test_dataset_dict.TestDocumentWithLabel", - ) - - for name, split in dataset_dict.items(): - assert split.document_converters[TestDocumentWithLabel] == convert_to_document_with_label - - -class NoDocument: - pass - - -def test_register_document_converter_resolve_wrong_document_type(dataset_dict): - - with pytest.raises(TypeError) as excinfo: - dataset_dict.register_document_converter( - convert_to_document_with_label, document_type="tests.data.test_dataset_dict.NoDocument" - ) - assert ( - str(excinfo.value) - == "document_type must be or resolve to a subclass of Document, but is 'tests.data.test_dataset_dict.NoDocument'" - ) - - -def test_register_document_converter_resolve_wrong_converter(dataset_dict): - - with pytest.raises(TypeError) as excinfo: - dataset_dict.register_document_converter([1, 2, 3], document_type=TestDocumentWithLabel) - assert str(excinfo.value) == "converter must be a callable or a dict, but is " - - -def test_to_document_type(dataset_dict): - dataset_dict.register_document_converter(convert_to_document_with_label) - dataset_dict_converted = dataset_dict.to_document_type(TestDocumentWithLabel) - assert dataset_dict_converted.document_type == TestDocumentWithLabel - for split in dataset_dict_converted.values(): - assert all(isinstance(doc, TestDocumentWithLabel) for doc in split) - - -def test_to_document_resolve(dataset_dict): - dataset_dict.register_document_converter(convert_to_document_with_label) - dataset_dict_converted = dataset_dict.to_document_type( - "tests.data.test_dataset_dict.TestDocumentWithLabel" - ) - assert dataset_dict_converted.document_type == TestDocumentWithLabel - for split in dataset_dict_converted.values(): - assert all(isinstance(doc, TestDocumentWithLabel) for doc in split) - - -def test_to_document_type_resolve_wrong_document_type(dataset_dict): - dataset_dict.register_document_converter(convert_to_document_with_label) - with pytest.raises(TypeError) as excinfo: - dataset_dict.to_document_type("tests.data.test_dataset_dict.NoDocument") - assert ( - str(excinfo.value) - == "document_type must be a document type or a string that can be resolved to such a type, but got tests.data.test_dataset_dict.NoDocument." - ) - - -def test_to_document_type_noop(dataset_dict): - assert dataset_dict.document_type == DocumentWithEntitiesAndRelations - dataset_dict_converted = dataset_dict.to_document_type(DocumentWithEntitiesAndRelations) - assert dataset_dict_converted.document_type == DocumentWithEntitiesAndRelations - assert dataset_dict_converted == dataset_dict diff --git a/tests/data/test_dataset_scripts.py b/tests/data/test_dataset_scripts.py deleted file mode 100644 index 38445894..00000000 --- a/tests/data/test_dataset_scripts.py +++ /dev/null @@ -1,66 +0,0 @@ -import re -from pathlib import Path -from unittest import TestCase - -from tests import DATASET_BUILDERS_ROOT - - -# taken from https://github.com/huggingface/datasets/blob/master/tests/test_dataset_scripts.py -class TestDatasetScripts(TestCase): - def _no_encoding_on_file_open(self, filepath: str): - r"""Find all instances where a non-binary file is opened without UTF-8 encoding. - This function uses regular expressions to find instances where Python's `open()` function is used to open - non-binary files. See below for an explanation of the regular expression: - (?!.*\b(?:encoding|rb|w|wb|w+|wb+|ab|ab+)\b): Lookahead and discard match if `encoding` or `rb` etc are - arguments of `open()`. - (?<=\s): Lookbehind and match if `open()` predeceded by one whitespace. - (open)\((.*)\): Capture everything in parentheses of `open()`. - """ - - with open(filepath, encoding="utf-8") as input_file: - regexp = re.compile( - r"(?!.*\b(?:encoding|rb|w|wb|w+|wb+|ab|ab+)\b)(?<=\s)(open)\((.*)\)" - ) - input_text = input_file.read() - match = regexp.search(input_text) - - return match - - def _no_print_statements(self, filepath: str): - r"""Find all instances where a python sctipt file contains a `print` statement. - #[^\r\n]*print\(: Match print statement inside a comment. We ignore this group. - \"[^\r\n]*print\(: Match print statement inside a string. We ignore this group. - \"\"\".*?print\(.*?\"\"\"": Match print statement inside a triple-quoted string. Uses re.DOTALL to also match newlines with ".". - We ignore this group. - (print\()): Match print statement. - """ - - with open(filepath, encoding="utf-8") as input_file: - regexp = re.compile( - r"#[^\r\n]*print\(|\"[^\r\n]*print\(|\"\"\".*?print\(.*?\"\"\"|(print\()", - re.DOTALL, - ) - input_text = input_file.read() - # use `re.finditer` to handle the case where the ignored groups would be matched first by `re.search` - matches = regexp.finditer(input_text) - - filtered_matches = [ - match for match in matches if match is not None and match.group(1) is not None - ] - return filtered_matches[0] if filtered_matches else None - - def test_no_encoding_on_file_open(self): - dataset_files = list(DATASET_BUILDERS_ROOT.absolute().glob("**/*.py")) - - for dataset in dataset_files: - if self._no_encoding_on_file_open(str(dataset)): - raise AssertionError(f"open(...) must use utf-8 encoding in {dataset}") - - def test_no_print_statements(self): - dataset_files = list(DATASET_BUILDERS_ROOT.absolute().glob("**/*.py")) - - for dataset in dataset_files: - if self._no_print_statements(str(dataset)): - raise AssertionError( - f"print statement found in {dataset}. Use datasets.logger/logging instead." - ) diff --git a/tests/data/test_document_conversion.py b/tests/data/test_document_conversion.py deleted file mode 100644 index 20dca28d..00000000 --- a/tests/data/test_document_conversion.py +++ /dev/null @@ -1,558 +0,0 @@ -import dataclasses - -import pytest -from transformers import AutoTokenizer, PreTrainedTokenizer - -from pytorch_ie import ( - text_based_document_to_token_based, - token_based_document_to_text_based, - tokenize_document, -) -from pytorch_ie.annotations import BinaryRelation, LabeledSpan, Span -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TokenBasedDocument -from tests.conftest import TestDocument - - -@dataclasses.dataclass -class TokenizedTestDocument(TokenBasedDocument): - sentences: AnnotationList[Span] = annotation_field(target="tokens") - entities: AnnotationList[LabeledSpan] = annotation_field(target="tokens") - relations: AnnotationList[BinaryRelation] = annotation_field(target="entities") - - -@pytest.fixture(scope="module") -def tokenizer() -> PreTrainedTokenizer: - return AutoTokenizer.from_pretrained("bert-base-cased") - - -def test_text_based_document_to_token_based(documents, tokenizer): - assert len(documents) >= 3 - for i, doc in enumerate(documents[:3]): - tokenized_text = tokenizer(doc.text, return_offsets_mapping=True) - tokenized_doc = text_based_document_to_token_based( - doc, - tokens=tokenized_text.tokens(), - result_document_type=TokenizedTestDocument, - # to increase test coverage - token_offset_mapping=None if i == 1 else tokenized_text.offset_mapping, - # to increase test coverage - char_to_token=None if i == 0 else tokenized_text.char_to_token, - ) - assert tokenized_doc is not None - - # check (de-)serialization - tokenized_doc.copy() - - offset_mapping_lists = [list(offsets) for offsets in tokenized_text.offset_mapping] - if i == 0: - assert doc.id == "train_doc1" - assert tokenized_doc.metadata["text"] == doc.text == "A single sentence." - assert tokenized_doc.metadata["token_offset_mapping"] == offset_mapping_lists - assert tokenized_doc.metadata.get("char_to_token") is None - assert tokenized_doc.tokens == ("[CLS]", "A", "single", "sentence", ".", "[SEP]") - assert len(tokenized_doc.sentences) == len(doc.sentences) == 1 - assert str(doc.sentences[0]) == "A single sentence." - assert str(tokenized_doc.sentences[0]) == "('A', 'single', 'sentence', '.')" - assert len(tokenized_doc.entities) == len(doc.entities) == 0 - assert len(tokenized_doc.relations) == len(doc.relations) == 0 - elif i == 1: - assert doc.id == "train_doc2" - assert tokenized_doc.metadata["text"] == doc.text == "Entity A works at B." - assert tokenized_doc.metadata.get("token_offset_mapping") is None - assert tokenized_doc.metadata["char_to_token"] == tokenized_text.char_to_token - assert tokenized_doc.tokens == ( - "[CLS]", - "En", - "##ti", - "##ty", - "A", - "works", - "at", - "B", - ".", - "[SEP]", - ) - assert len(tokenized_doc.sentences) == len(doc.sentences) == 1 - assert str(doc.sentences[0]) == "Entity A works at B." - assert ( - str(tokenized_doc.sentences[0]) - == "('En', '##ti', '##ty', 'A', 'works', 'at', 'B', '.')" - ) - assert len(tokenized_doc.entities) == len(doc.entities) == 2 - assert str(doc.entities[0]) == "Entity A" - assert str(tokenized_doc.entities[0]) == "('En', '##ti', '##ty', 'A')" - assert str(doc.entities[1]) == "B" - assert str(tokenized_doc.entities[1]) == "('B',)" - assert len(tokenized_doc.relations) == len(doc.relations) == 1 - assert doc.relations[0].head == doc.entities[0] - assert tokenized_doc.relations[0].head == tokenized_doc.entities[0] - assert doc.relations[0].tail == doc.entities[1] - assert tokenized_doc.relations[0].tail == tokenized_doc.entities[1] - elif i == 2: - assert doc.id == "train_doc3" - assert tokenized_doc.metadata["text"] == doc.text == "Entity C and D." - assert tokenized_doc.metadata["token_offset_mapping"] == offset_mapping_lists - assert tokenized_doc.metadata["char_to_token"] == tokenized_text.char_to_token - assert tokenized_doc.tokens == ( - "[CLS]", - "En", - "##ti", - "##ty", - "C", - "and", - "D", - ".", - "[SEP]", - ) - assert len(tokenized_doc.sentences) == len(doc.sentences) == 1 - assert str(doc.sentences[0]) == "Entity C and D." - assert ( - str(tokenized_doc.sentences[0]) == "('En', '##ti', '##ty', 'C', 'and', 'D', '.')" - ) - assert len(tokenized_doc.entities) == len(doc.entities) == 2 - assert str(doc.entities[0]) == "Entity C" - assert str(tokenized_doc.entities[0]) == "('En', '##ti', '##ty', 'C')" - assert str(doc.entities[1]) == "D" - assert str(tokenized_doc.entities[1]) == "('D',)" - assert len(tokenized_doc.relations) == len(doc.relations) == 0 - else: - raise ValueError(f"Unexpected document: {doc.id}") - - -def test_text_based_document_to_token_based_missing_args(documents, tokenizer): - with pytest.raises(ValueError) as excinfo: - doc = documents[0] - tokenized_text = tokenizer(doc.text) - tokenized_doc = text_based_document_to_token_based( - doc, - tokens=tokenized_text.tokens(), - result_document_type=TokenizedTestDocument, - ) - assert ( - str(excinfo.value) - == "either token_offset_mapping or char_to_token must be provided to convert a text based document " - "to token based, but both are None" - ) - - -def test_text_based_document_to_token_based_unaligned_span_strict(documents, tokenizer): - doc = documents[0].copy() - # add a span that is not aligned with the tokenization - doc.entities.append(LabeledSpan(start=0, end=2, label="unaligned")) - assert str(doc.entities[-1]) == "A " - tokenized_text = tokenizer(doc.text, return_offsets_mapping=True) - with pytest.raises(ValueError) as excinfo: - tokenized_doc = text_based_document_to_token_based( - doc, - tokens=tokenized_text.tokens(), - result_document_type=TokenizedTestDocument, - # to increase test coverage - token_offset_mapping=tokenized_text.offset_mapping, - # to increase test coverage - char_to_token=tokenized_text.char_to_token, - ) - assert ( - str(excinfo.value) - == 'cannot find token span for character span: "A ", text="A single sentence.", ' - "token_offset_mapping=[(0, 0), (0, 1), (2, 8), (9, 17), (17, 18), (0, 0)]" - ) - - -def test_text_based_document_to_token_based_unaligned_span_not_strict(documents, tokenizer): - doc = documents[0].copy() - doc.entities.append(LabeledSpan(start=0, end=2, label="unaligned")) - assert str(doc.entities[-1]) == "A " - tokenized_text = tokenizer(doc.text, return_offsets_mapping=True) - tokenized_doc = text_based_document_to_token_based( - doc, - tokens=tokenized_text.tokens(), - result_document_type=TokenizedTestDocument, - # to increase test coverage - token_offset_mapping=tokenized_text.offset_mapping, - # to increase test coverage - char_to_token=tokenized_text.char_to_token, - strict_span_conversion=False, - ) - - # check (de-)serialization - tokenized_doc.copy() - - assert len(doc.entities) == 1 - # the unaligned span is not included in the tokenized document - assert len(tokenized_doc.entities) == 0 - - -@pytest.fixture -def token_documents(documents, tokenizer): - result = [] - for doc in documents: - tokenized_text = tokenizer(doc.text, return_offsets_mapping=True) - tokenized_doc = text_based_document_to_token_based( - doc, - tokens=tokenized_text.tokens(), - result_document_type=TokenizedTestDocument, - char_to_token=tokenized_text.char_to_token, - token_offset_mapping=tokenized_text.offset_mapping, - ) - result.append(tokenized_doc) - return result - - -def test_token_based_document_to_text_based(documents, token_documents): - for doc, tokenized_doc in zip(documents, token_documents): - reconstructed_doc = token_based_document_to_text_based( - tokenized_doc, - result_document_type=TestDocument, - ) - assert reconstructed_doc is not None - doc_dict = doc.asdict() - reconstructed_doc_dict = reconstructed_doc.asdict() - # remove all added metadata (original text, token_offset_mapping, char_to_token, tokens) - reconstructed_doc_dict["metadata"] = { - k: reconstructed_doc_dict["metadata"][k] for k in doc_dict["metadata"] - } - assert reconstructed_doc_dict == doc_dict - - -def test_token_based_document_to_text_based_with_join_tokens_with(documents): - for doc in documents: - # split the text by individual whitespace characters - # so that we can reconstruct the original text via " ".join(tokens) - tokens = [] - token_offset_mapping = [] - start = 0 - for token in doc.text.split(" "): - tokens.append(token) - end = start + len(token) - token_offset_mapping.append((start, end)) - start = end + 1 - - tokenized_doc = text_based_document_to_token_based( - doc, - tokens=tokens, - result_document_type=TokenizedTestDocument, - token_offset_mapping=token_offset_mapping, - ) - reconstructed_doc = token_based_document_to_text_based( - tokenized_doc, - result_document_type=TestDocument, - join_tokens_with=" ", - ) - assert reconstructed_doc is not None - assert reconstructed_doc.text == doc.text - - if doc.id in ["train_doc1", "train_doc7"]: - doc_dict = doc.asdict() - reconstructed_doc_dict = reconstructed_doc.asdict() - # remove all added metadata (original text, token_offset_mapping, char_to_token, tokens) - reconstructed_doc_dict["metadata"] = { - k: reconstructed_doc_dict["metadata"][k] for k in doc_dict["metadata"] - } - assert reconstructed_doc_dict == doc_dict - elif doc.id == "train_doc2": - assert reconstructed_doc.sentences == doc.sentences - assert len(reconstructed_doc.entities) == len(doc.entities) == 2 - assert str(reconstructed_doc.entities[0]) == str(doc.entities[0]) == "Entity A" - assert str(doc.entities[1]) == "B" - assert str(reconstructed_doc.entities[1]) == "B." - assert len(reconstructed_doc.relations) == len(doc.relations) == 1 - assert ( - reconstructed_doc.relations[0].label == doc.relations[0].label == "per:employee_of" - ) - assert doc.relations[0].head == doc.entities[0] - assert reconstructed_doc.relations[0].head == reconstructed_doc.entities[0] - assert doc.relations[0].tail == doc.entities[1] - assert reconstructed_doc.relations[0].tail == reconstructed_doc.entities[1] - elif doc.id == "train_doc3": - assert reconstructed_doc.sentences == doc.sentences - assert len(reconstructed_doc.entities) == len(doc.entities) == 2 - assert str(reconstructed_doc.entities[0]) == str(doc.entities[0]) == "Entity C" - assert str(doc.entities[1]) == "D" - assert str(reconstructed_doc.entities[1]) == "D." - assert len(reconstructed_doc.relations) == len(doc.relations) == 0 - elif doc.id == "train_doc4": - assert reconstructed_doc.sentences == doc.sentences - assert len(reconstructed_doc.entities) == len(doc.entities) == 2 - assert str(reconstructed_doc.entities[0]) == str(doc.entities[0]) == "Entity E" - assert str(doc.entities[1]) == "F" - assert str(reconstructed_doc.entities[1]) == "F." - assert len(reconstructed_doc.relations) == len(doc.relations) == 0 - elif doc.id == "train_doc5": - assert reconstructed_doc.sentences == doc.sentences - assert len(reconstructed_doc.entities) == len(doc.entities) == 3 - assert str(reconstructed_doc.entities[0]) == str(doc.entities[0]) == "Entity G" - assert str(doc.entities[1]) == "H" - assert str(reconstructed_doc.entities[1]) == "H." - assert str(doc.entities[2]) == "I" - assert str(reconstructed_doc.entities[2]) == "I." - assert len(reconstructed_doc.relations) == len(doc.relations) == 3 - assert ( - reconstructed_doc.relations[0].label == doc.relations[0].label == "per:employee_of" - ) - assert doc.relations[0].head == doc.entities[0] - assert reconstructed_doc.relations[0].head == reconstructed_doc.entities[0] - assert doc.relations[0].tail == doc.entities[1] - assert reconstructed_doc.relations[0].tail == reconstructed_doc.entities[1] - assert reconstructed_doc.relations[1].label == doc.relations[1].label == "per:founder" - assert doc.relations[1].head == doc.entities[0] - assert reconstructed_doc.relations[1].head == reconstructed_doc.entities[0] - assert doc.relations[1].tail == doc.entities[2] - assert reconstructed_doc.relations[1].tail == reconstructed_doc.entities[2] - assert ( - reconstructed_doc.relations[2].label == doc.relations[2].label == "org:founded_by" - ) - assert doc.relations[2].head == doc.entities[2] - assert reconstructed_doc.relations[2].head == reconstructed_doc.entities[2] - assert doc.relations[2].tail == doc.entities[1] - assert reconstructed_doc.relations[2].tail == reconstructed_doc.entities[1] - elif doc.id == "train_doc6": - assert reconstructed_doc.sentences == doc.sentences - assert len(reconstructed_doc.entities) == len(doc.entities) == 3 - assert str(doc.entities[0]) == "Entity J" - assert str(reconstructed_doc.entities[0]) == "Entity J," - assert str(doc.entities[1]) == "K" - assert str(reconstructed_doc.entities[1]) == "K," - assert str(doc.entities[2]) == "L" - assert str(reconstructed_doc.entities[2]) == "L." - assert len(reconstructed_doc.relations) == len(doc.relations) == 0 - elif doc.id == "train_doc8": - assert len(reconstructed_doc.sentences) == len(doc.sentences) == 3 - assert ( - str(reconstructed_doc.sentences[0]) == str(doc.sentences[0]) == "First sentence." - ) - assert ( - str(reconstructed_doc.sentences[1]) - == str(doc.sentences[1]) - == "Entity M works at N." - ) - assert str(doc.sentences[2]) == "And it founded O" - assert str(reconstructed_doc.sentences[2]) == "And it founded O." - assert len(reconstructed_doc.entities) == len(doc.entities) == 4 - assert str(reconstructed_doc.entities[0]) == str(doc.entities[0]) == "Entity M" - assert str(doc.entities[1]) == "N" - assert str(reconstructed_doc.entities[1]) == "N." - assert str(reconstructed_doc.entities[2]) == str(doc.entities[2]) == "it" - assert str(doc.entities[3]) == "O" - assert str(reconstructed_doc.entities[3]) == "O." - assert len(reconstructed_doc.relations) == len(doc.relations) == 3 - assert ( - reconstructed_doc.relations[0].label == doc.relations[0].label == "per:employee_of" - ) - assert doc.relations[0].head == doc.entities[0] - assert reconstructed_doc.relations[0].head == reconstructed_doc.entities[0] - assert doc.relations[0].tail == doc.entities[1] - assert reconstructed_doc.relations[0].tail == reconstructed_doc.entities[1] - assert reconstructed_doc.relations[1].label == doc.relations[1].label == "per:founder" - assert doc.relations[1].head == doc.entities[2] - assert reconstructed_doc.relations[1].head == reconstructed_doc.entities[2] - assert doc.relations[1].tail == doc.entities[3] - assert reconstructed_doc.relations[1].tail == reconstructed_doc.entities[3] - assert ( - reconstructed_doc.relations[2].label == doc.relations[2].label == "org:founded_by" - ) - assert doc.relations[2].head == doc.entities[3] - assert reconstructed_doc.relations[2].head == reconstructed_doc.entities[3] - assert doc.relations[2].tail == doc.entities[2] - assert reconstructed_doc.relations[2].tail == reconstructed_doc.entities[2] - else: - raise ValueError(f"Unexpected document: {doc.id}") - - -def test_tokenize_document(documents, tokenizer): - doc = documents[1] - tokenized_docs = tokenize_document( - doc, - tokenizer=tokenizer, - result_document_type=TokenizedTestDocument, - ) - assert len(tokenized_docs) == 1 - tokenized_doc = tokenized_docs[0] - - # check (de-)serialization - tokenized_doc.copy() - - assert doc.id == "train_doc2" - assert tokenized_doc.metadata["text"] == doc.text == "Entity A works at B." - assert tokenized_doc.tokens == ( - "[CLS]", - "En", - "##ti", - "##ty", - "A", - "works", - "at", - "B", - ".", - "[SEP]", - ) - assert len(tokenized_doc.sentences) == len(doc.sentences) == 1 - assert str(doc.sentences[0]) == "Entity A works at B." - assert ( - str(tokenized_doc.sentences[0]) == "('En', '##ti', '##ty', 'A', 'works', 'at', 'B', '.')" - ) - assert len(tokenized_doc.entities) == len(doc.entities) == 2 - assert str(doc.entities[0]) == "Entity A" - assert str(tokenized_doc.entities[0]) == "('En', '##ti', '##ty', 'A')" - assert str(doc.entities[1]) == "B" - assert str(tokenized_doc.entities[1]) == "('B',)" - assert len(tokenized_doc.relations) == len(doc.relations) == 1 - assert tokenized_doc.relations[0].label == doc.relations[0].label == "per:employee_of" - assert doc.relations[0].head == doc.entities[0] - assert tokenized_doc.relations[0].head == tokenized_doc.entities[0] - assert doc.relations[0].tail == doc.entities[1] - assert tokenized_doc.relations[0].tail == tokenized_doc.entities[1] - - -def test_tokenize_document_max_length(documents, tokenizer): - doc = documents[1] - assert doc.id == "train_doc2" - assert doc.text == "Entity A works at B." - assert len(doc.sentences) == 1 - assert str(doc.sentences[0]) == "Entity A works at B." - assert len(doc.entities) == 2 - assert str(doc.entities[0]) == "Entity A" - assert str(doc.entities[1]) == "B" - assert len(doc.relations) == 1 - assert doc.relations[0].label == "per:employee_of" - assert doc.relations[0].head == doc.entities[0] - assert doc.relations[0].tail == doc.entities[1] - - tokenized_docs = tokenize_document( - doc, - tokenizer=tokenizer, - result_document_type=TokenizedTestDocument, - strict_span_conversion=False, - # This will cut out the second entity. Also, the sentence annotation will be removed, - # because the sentence is not complete anymore. - max_length=8, - return_overflowing_tokens=True, - ) - assert len(tokenized_docs) == 2 - tokenized_doc = tokenized_docs[0] - - # check (de-)serialization - tokenized_doc.copy() - - assert tokenized_doc.id == doc.id == "train_doc2" - assert tokenized_doc.metadata["text"] == doc.text == "Entity A works at B." - assert tokenized_doc.tokens == ("[CLS]", "En", "##ti", "##ty", "A", "works", "at", "[SEP]") - assert len(tokenized_doc.sentences) == 0 - assert len(tokenized_doc.entities) == 1 - assert str(tokenized_doc.entities[0]) == "('En', '##ti', '##ty', 'A')" - assert len(tokenized_doc.relations) == 0 - - tokenized_doc = tokenized_docs[1] - - # check (de-)serialization - tokenized_doc.copy() - - assert tokenized_doc.id == doc.id == "train_doc2" - assert tokenized_doc.metadata["text"] == doc.text == "Entity A works at B." - assert tokenized_doc.tokens == ("[CLS]", "B", ".", "[SEP]") - assert len(tokenized_doc.sentences) == 0 - assert len(tokenized_doc.entities) == 1 - assert str(tokenized_doc.entities[0]) == "('B',)" - assert len(tokenized_doc.relations) == 0 - - -def test_tokenize_document_partition(documents, tokenizer): - doc = documents[7] - assert doc.id == "train_doc8" - assert doc.text == "First sentence. Entity M works at N. And it founded O." - assert len(doc.sentences) == 3 - assert str(doc.sentences[0]) == "First sentence." - assert str(doc.sentences[1]) == "Entity M works at N." - assert str(doc.sentences[2]) == "And it founded O" - assert len(doc.entities) == 4 - assert str(doc.entities[0]) == "Entity M" - assert str(doc.entities[1]) == "N" - assert str(doc.entities[2]) == "it" - assert str(doc.entities[3]) == "O" - assert len(doc.relations) == 3 - assert doc.relations[0].head == doc.entities[0] - assert doc.relations[0].tail == doc.entities[1] - assert doc.relations[1].head == doc.entities[2] - assert doc.relations[1].tail == doc.entities[3] - assert doc.relations[2].head == doc.entities[3] - assert doc.relations[2].tail == doc.entities[2] - - tokenized_docs = tokenize_document( - doc, - tokenizer=tokenizer, - result_document_type=TokenizedTestDocument, - strict_span_conversion=False, - partition_layer="sentences", - ) - assert len(tokenized_docs) == 3 - tokenized_doc = tokenized_docs[0] - - # check (de-)serialization - tokenized_doc.copy() - - assert tokenized_doc.id == doc.id == "train_doc8" - assert ( - tokenized_doc.metadata["text"] - == doc.text - == "First sentence. Entity M works at N. And it founded O." - ) - assert tokenized_doc.tokens == ("[CLS]", "First", "sentence", ".", "[SEP]") - assert len(tokenized_doc.sentences) == 1 - assert len(tokenized_doc.entities) == 0 - assert len(tokenized_doc.relations) == 0 - - tokenized_doc = tokenized_docs[1] - - # check (de-)serialization - tokenized_doc.copy() - - assert tokenized_doc.id == doc.id == "train_doc8" - assert ( - tokenized_doc.metadata["text"] - == doc.text - == "First sentence. Entity M works at N. And it founded O." - ) - assert tokenized_doc.tokens == ( - "[CLS]", - "En", - "##ti", - "##ty", - "M", - "works", - "at", - "N", - ".", - "[SEP]", - ) - assert len(tokenized_doc.sentences) == 1 - assert len(tokenized_doc.entities) == 2 - assert str(tokenized_doc.entities[0]) == "('En', '##ti', '##ty', 'M')" - assert str(tokenized_doc.entities[1]) == "('N',)" - assert len(tokenized_doc.relations) == 1 - assert tokenized_doc.relations[0].label == "per:employee_of" - assert tokenized_doc.relations[0].head == tokenized_doc.entities[0] - assert tokenized_doc.relations[0].tail == tokenized_doc.entities[1] - - tokenized_doc = tokenized_docs[2] - - # check (de-)serialization - tokenized_doc.copy() - - assert tokenized_doc.id == doc.id == "train_doc8" - assert ( - tokenized_doc.metadata["text"] - == doc.text - == "First sentence. Entity M works at N. And it founded O." - ) - assert tokenized_doc.tokens == ("[CLS]", "And", "it", "founded", "O", "[SEP]") - assert len(tokenized_doc.sentences) == 1 - assert len(tokenized_doc.entities) == 2 - assert str(tokenized_doc.entities[0]) == "('it',)" - assert str(tokenized_doc.entities[1]) == "('O',)" - assert len(tokenized_doc.relations) == 2 - assert tokenized_doc.relations[0].label == "per:founder" - assert tokenized_doc.relations[0].head == tokenized_doc.entities[0] - assert tokenized_doc.relations[0].tail == tokenized_doc.entities[1] - assert tokenized_doc.relations[1].label == "org:founded_by" - assert tokenized_doc.relations[1].head == tokenized_doc.entities[1] - assert tokenized_doc.relations[1].tail == tokenized_doc.entities[0] diff --git a/tests/fixtures/builder/datasets/base_multi_config/base_multi_config.py b/tests/fixtures/builder/datasets/base_multi_config/base_multi_config.py deleted file mode 100644 index 9a014d66..00000000 --- a/tests/fixtures/builder/datasets/base_multi_config/base_multi_config.py +++ /dev/null @@ -1,249 +0,0 @@ -# Copyright 2020 HuggingFace Datasets Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Lint as: python3 -"""Introduction to the CoNLL-2002 Shared Task: Language-Independent Named Entity Recognition""" - -import datasets - -logger = datasets.logging.get_logger(__name__) - - -_CITATION = """\ -@inproceedings{tjong-kim-sang-2002-introduction, - title = "Introduction to the {C}o{NLL}-2002 Shared Task: Language-Independent Named Entity Recognition", - author = "Tjong Kim Sang, Erik F.", - booktitle = "{COLING}-02: The 6th Conference on Natural Language Learning 2002 ({C}o{NLL}-2002)", - year = "2002", - url = "https://www.aclweb.org/anthology/W02-2024", -} -""" - -_DESCRIPTION = """\ -Named entities are phrases that contain the names of persons, organizations, locations, times and quantities. - -Example: -[PER Wolff] , currently a journalist in [LOC Argentina] , played with [PER Del Bosque] in the final years of the seventies in [ORG Real Madrid] . - -The shared task of CoNLL-2002 concerns language-independent named entity recognition. -We will concentrate on four types of named entities: persons, locations, organizations and names of miscellaneous entities that do not belong to the previous three groups. -The participants of the shared task will be offered training and test data for at least two languages. -They will use the data for developing a named-entity recognition system that includes a machine learning component. -Information sources other than the training data may be used in this shared task. -We are especially interested in methods that can use additional unannotated data for improving their performance (for example co-training). - -The train/validation/test sets are available in Spanish and Dutch. - -For more details see https://www.clips.uantwerpen.be/conll2002/ner/ and https://www.aclweb.org/anthology/W02-2024/ -""" - -_URL = "https://raw.githubusercontent.com/teropa/nlp/master/resources/corpora/conll2002/" -_ES_TRAINING_FILE = "esp.train" -_ES_DEV_FILE = "esp.testa" -_ES_TEST_FILE = "esp.testb" -_NL_TRAINING_FILE = "ned.train" -_NL_DEV_FILE = "ned.testa" -_NL_TEST_FILE = "ned.testb" - - -class Conll2002Config(datasets.BuilderConfig): - """BuilderConfig for Conll2002""" - - def __init__(self, **kwargs): - """BuilderConfig forConll2002. - - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - - -class Conll2002(datasets.GeneratorBasedBuilder): - """Conll2002 dataset.""" - - BUILDER_CONFIGS = [ - Conll2002Config( - name="es", version=datasets.Version("1.0.0"), description="Conll2002 Spanish dataset" - ), - Conll2002Config( - name="nl", version=datasets.Version("1.0.0"), description="Conll2002 Dutch dataset" - ), - ] - - def _info(self): - return datasets.DatasetInfo( - description=_DESCRIPTION, - features=datasets.Features( - { - "id": datasets.Value("string"), - "tokens": datasets.Sequence(datasets.Value("string")), - "pos_tags": datasets.Sequence( - datasets.features.ClassLabel( - names=[ - "AO", - "AQ", - "CC", - "CS", - "DA", - "DE", - "DD", - "DI", - "DN", - "DP", - "DT", - "Faa", - "Fat", - "Fc", - "Fd", - "Fe", - "Fg", - "Fh", - "Fia", - "Fit", - "Fp", - "Fpa", - "Fpt", - "Fs", - "Ft", - "Fx", - "Fz", - "I", - "NC", - "NP", - "P0", - "PD", - "PI", - "PN", - "PP", - "PR", - "PT", - "PX", - "RG", - "RN", - "SP", - "VAI", - "VAM", - "VAN", - "VAP", - "VAS", - "VMG", - "VMI", - "VMM", - "VMN", - "VMP", - "VMS", - "VSG", - "VSI", - "VSM", - "VSN", - "VSP", - "VSS", - "Y", - "Z", - ] - ) - if self.config.name == "es" - else datasets.features.ClassLabel( - names=[ - "Adj", - "Adv", - "Art", - "Conj", - "Int", - "Misc", - "N", - "Num", - "Prep", - "Pron", - "Punc", - "V", - ] - ) - ), - "ner_tags": datasets.Sequence( - datasets.features.ClassLabel( - names=[ - "O", - "B-PER", - "I-PER", - "B-ORG", - "I-ORG", - "B-LOC", - "I-LOC", - "B-MISC", - "I-MISC", - ] - ) - ), - } - ), - supervised_keys=None, - homepage="https://www.aclweb.org/anthology/W02-2024/", - citation=_CITATION, - ) - - def _split_generators(self, dl_manager): - """Returns SplitGenerators.""" - urls_to_download = { - "train": f"{_URL}{_ES_TRAINING_FILE if self.config.name == 'es' else _NL_TRAINING_FILE}", - "dev": f"{_URL}{_ES_DEV_FILE if self.config.name == 'es' else _NL_DEV_FILE}", - "test": f"{_URL}{_ES_TEST_FILE if self.config.name == 'es' else _NL_TEST_FILE}", - } - downloaded_files = dl_manager.download_and_extract(urls_to_download) - - return [ - datasets.SplitGenerator( - name=datasets.Split.TRAIN, gen_kwargs={"filepath": downloaded_files["train"]} - ), - datasets.SplitGenerator( - name=datasets.Split.VALIDATION, gen_kwargs={"filepath": downloaded_files["dev"]} - ), - datasets.SplitGenerator( - name=datasets.Split.TEST, gen_kwargs={"filepath": downloaded_files["test"]} - ), - ] - - def _generate_examples(self, filepath): - logger.info("⏳ Generating examples from = %s", filepath) - with open(filepath, encoding="utf-8") as f: - guid = 0 - tokens = [] - pos_tags = [] - ner_tags = [] - for line in f: - if line.startswith("-DOCSTART-") or line == "" or line == "\n": - if tokens: - yield guid, { - "id": str(guid), - "tokens": tokens, - "pos_tags": pos_tags, - "ner_tags": ner_tags, - } - guid += 1 - tokens = [] - pos_tags = [] - ner_tags = [] - else: - # conll2002 tokens are space separated - splits = line.split(" ") - tokens.append(splits[0]) - pos_tags.append(splits[1]) - ner_tags.append(splits[2].rstrip()) - # last example - yield guid, { - "id": str(guid), - "tokens": tokens, - "pos_tags": pos_tags, - "ner_tags": ner_tags, - } diff --git a/tests/fixtures/builder/datasets/base_single_config/base_single_config.py b/tests/fixtures/builder/datasets/base_single_config/base_single_config.py deleted file mode 100644 index 7f87b4ec..00000000 --- a/tests/fixtures/builder/datasets/base_single_config/base_single_config.py +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright 2020 HuggingFace Datasets Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Lint as: python3 -"""Introduction to the CoNLL-2003 Shared Task: Language-Independent Named Entity Recognition""" - -import os - -import datasets - -logger = datasets.logging.get_logger(__name__) - - -_CITATION = """\ -@inproceedings{tjong-kim-sang-de-meulder-2003-introduction, - title = "Introduction to the {C}o{NLL}-2003 Shared Task: Language-Independent Named Entity Recognition", - author = "Tjong Kim Sang, Erik F. and - De Meulder, Fien", - booktitle = "Proceedings of the Seventh Conference on Natural Language Learning at {HLT}-{NAACL} 2003", - year = "2003", - url = "https://www.aclweb.org/anthology/W03-0419", - pages = "142--147", -} -""" - -_DESCRIPTION = """\ -The shared task of CoNLL-2003 concerns language-independent named entity recognition. We will concentrate on -four types of named entities: persons, locations, organizations and names of miscellaneous entities that do -not belong to the previous three groups. - -The CoNLL-2003 shared task data files contain four columns separated by a single space. Each word has been put on -a separate line and there is an empty line after each sentence. The first item on each line is a word, the second -a part-of-speech (POS) tag, the third a syntactic chunk tag and the fourth the named entity tag. The chunk tags -and the named entity tags have the format I-TYPE which means that the word is inside a phrase of type TYPE. Only -if two phrases of the same type immediately follow each other, the first word of the second phrase will have tag -B-TYPE to show that it starts a new phrase. A word with tag O is not part of a phrase. Note the dataset uses IOB2 -tagging scheme, whereas the original dataset uses IOB1. - -For more details see https://www.clips.uantwerpen.be/conll2003/ner/ and https://www.aclweb.org/anthology/W03-0419 -""" - -_URL = "https://data.deepai.org/conll2003.zip" -_TRAINING_FILE = "train.txt" -_DEV_FILE = "valid.txt" -_TEST_FILE = "test.txt" - - -class Conll2003Config(datasets.BuilderConfig): - """BuilderConfig for Conll2003""" - - def __init__(self, **kwargs): - """BuilderConfig forConll2003. - - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - - -class Conll2003(datasets.GeneratorBasedBuilder): - """Conll2003 dataset.""" - - BUILDER_CONFIGS = [ - Conll2003Config( - name="conll2003", version=datasets.Version("1.0.0"), description="Conll2003 dataset" - ), - ] - - def _info(self): - return datasets.DatasetInfo( - description=_DESCRIPTION, - features=datasets.Features( - { - "id": datasets.Value("string"), - "tokens": datasets.Sequence(datasets.Value("string")), - "pos_tags": datasets.Sequence( - datasets.features.ClassLabel( - names=[ - '"', - "''", - "#", - "$", - "(", - ")", - ",", - ".", - ":", - "``", - "CC", - "CD", - "DT", - "EX", - "FW", - "IN", - "JJ", - "JJR", - "JJS", - "LS", - "MD", - "NN", - "NNP", - "NNPS", - "NNS", - "NN|SYM", - "PDT", - "POS", - "PRP", - "PRP$", - "RB", - "RBR", - "RBS", - "RP", - "SYM", - "TO", - "UH", - "VB", - "VBD", - "VBG", - "VBN", - "VBP", - "VBZ", - "WDT", - "WP", - "WP$", - "WRB", - ] - ) - ), - "chunk_tags": datasets.Sequence( - datasets.features.ClassLabel( - names=[ - "O", - "B-ADJP", - "I-ADJP", - "B-ADVP", - "I-ADVP", - "B-CONJP", - "I-CONJP", - "B-INTJ", - "I-INTJ", - "B-LST", - "I-LST", - "B-NP", - "I-NP", - "B-PP", - "I-PP", - "B-PRT", - "I-PRT", - "B-SBAR", - "I-SBAR", - "B-UCP", - "I-UCP", - "B-VP", - "I-VP", - ] - ) - ), - "ner_tags": datasets.Sequence( - datasets.features.ClassLabel( - names=[ - "O", - "B-PER", - "I-PER", - "B-ORG", - "I-ORG", - "B-LOC", - "I-LOC", - "B-MISC", - "I-MISC", - ] - ) - ), - } - ), - supervised_keys=None, - homepage="https://www.aclweb.org/anthology/W03-0419/", - citation=_CITATION, - ) - - def _split_generators(self, dl_manager): - """Returns SplitGenerators.""" - downloaded_file = dl_manager.download_and_extract(_URL) - data_files = { - "train": os.path.join(downloaded_file, _TRAINING_FILE), - "dev": os.path.join(downloaded_file, _DEV_FILE), - "test": os.path.join(downloaded_file, _TEST_FILE), - } - - return [ - datasets.SplitGenerator( - name=datasets.Split.TRAIN, gen_kwargs={"filepath": data_files["train"]} - ), - datasets.SplitGenerator( - name=datasets.Split.VALIDATION, gen_kwargs={"filepath": data_files["dev"]} - ), - datasets.SplitGenerator( - name=datasets.Split.TEST, gen_kwargs={"filepath": data_files["test"]} - ), - ] - - def _generate_examples(self, filepath): - logger.info("⏳ Generating examples from = %s", filepath) - with open(filepath, encoding="utf-8") as f: - guid = 0 - tokens = [] - pos_tags = [] - chunk_tags = [] - ner_tags = [] - for line in f: - if line.startswith("-DOCSTART-") or line == "" or line == "\n": - if tokens: - yield guid, { - "id": str(guid), - "tokens": tokens, - "pos_tags": pos_tags, - "chunk_tags": chunk_tags, - "ner_tags": ner_tags, - } - guid += 1 - tokens = [] - pos_tags = [] - chunk_tags = [] - ner_tags = [] - else: - # conll2003 tokens are space separated - splits = line.split(" ") - tokens.append(splits[0]) - pos_tags.append(splits[1]) - chunk_tags.append(splits[2]) - ner_tags.append(splits[3].rstrip()) - # last example - if tokens: - yield guid, { - "id": str(guid), - "tokens": tokens, - "pos_tags": pos_tags, - "chunk_tags": chunk_tags, - "ner_tags": ner_tags, - } diff --git a/tests/fixtures/builder/datasets/default_config_kwargs/default_config_kwargs.py b/tests/fixtures/builder/datasets/default_config_kwargs/default_config_kwargs.py deleted file mode 100644 index ed209d84..00000000 --- a/tests/fixtures/builder/datasets/default_config_kwargs/default_config_kwargs.py +++ /dev/null @@ -1,57 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from tests import FIXTURES_ROOT - - -class ExampleConfig(datasets.BuilderConfig): - """BuilderConfig for CoNLL2002""" - - def __init__(self, parameter: str, **kwargs): - """BuilderConfig for CoNLL2002. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - self.parameter = parameter - - -@dataclass -class ExampleDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class Example(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = ExampleDocument - - BASE_DATASET_PATH = str(FIXTURES_ROOT / "builder" / "datasets" / "base_multi_config") - - BASE_CONFIG_KWARGS_DICT = { - "nl": {"version": datasets.Version("0.0.0"), "description": "new description"}, - } - - BUILDER_CONFIGS = [ - ExampleConfig( - name="es", - version=datasets.Version("1.0.0"), - description="CoNLL2002 Spanish dataset", - parameter="test", - ), - ExampleConfig( - name="nl", - version=datasets.Version("1.0.0"), - description="CoNLL2002 Dutch dataset", - parameter="test", - ), - ] - - def _generate_document_kwargs(self, dataset): - pass - - def _generate_document(self, example, int_to_str): - pass diff --git a/tests/fixtures/builder/datasets/multi_config/multi_config.py b/tests/fixtures/builder/datasets/multi_config/multi_config.py deleted file mode 100644 index ced402fc..00000000 --- a/tests/fixtures/builder/datasets/multi_config/multi_config.py +++ /dev/null @@ -1,53 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from tests import FIXTURES_ROOT - - -class ExampleConfig(datasets.BuilderConfig): - """BuilderConfig for CoNLL2002""" - - def __init__(self, parameter: str, **kwargs): - """BuilderConfig for CoNLL2002. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - self.parameter = parameter - - -@dataclass -class ExampleDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class Example(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = ExampleDocument - - BASE_DATASET_PATH = str(FIXTURES_ROOT / "builder" / "datasets" / "base_multi_config") - - BUILDER_CONFIGS = [ - ExampleConfig( - name="es", - version=datasets.Version("1.0.0"), - description="CoNLL2002 Spanish dataset", - parameter="test", - ), - ExampleConfig( - name="nl", - version=datasets.Version("1.0.0"), - description="CoNLL2002 Dutch dataset", - parameter="test", - ), - ] - - def _generate_document_kwargs(self, dataset): - pass - - def _generate_document(self, example, int_to_str): - pass diff --git a/tests/fixtures/builder/datasets/name_mapping/name_mapping.py b/tests/fixtures/builder/datasets/name_mapping/name_mapping.py deleted file mode 100644 index 70fd5e15..00000000 --- a/tests/fixtures/builder/datasets/name_mapping/name_mapping.py +++ /dev/null @@ -1,56 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from tests import FIXTURES_ROOT - - -class ExampleConfig(datasets.BuilderConfig): - """BuilderConfig for CoNLL2002""" - - def __init__(self, parameter: str, **kwargs): - """BuilderConfig for CoNLL2002. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - self.parameter = parameter - - -@dataclass -class ExampleDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class Example(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = ExampleDocument - - BASE_DATASET_PATH = str(FIXTURES_ROOT / "builder" / "datasets" / "base_multi_config") - - # map everything to "nl" - BASE_CONFIG_KWARGS_DICT = {"es": {"name": "nl"}} - - BUILDER_CONFIGS = [ - ExampleConfig( - name="es", - version=datasets.Version("1.0.0"), - description="CoNLL2002 Spanish dataset", - parameter="test", - ), - ExampleConfig( - name="nl", - version=datasets.Version("1.0.0"), - description="CoNLL2002 Dutch dataset", - parameter="test", - ), - ] - - def _generate_document_kwargs(self, dataset): - pass - - def _generate_document(self, example, int_to_str): - pass diff --git a/tests/fixtures/builder/datasets/name_mapping_disabled/name_mapping_disabled.py b/tests/fixtures/builder/datasets/name_mapping_disabled/name_mapping_disabled.py deleted file mode 100644 index a713905a..00000000 --- a/tests/fixtures/builder/datasets/name_mapping_disabled/name_mapping_disabled.py +++ /dev/null @@ -1,56 +0,0 @@ -from dataclasses import dataclass - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from tests import FIXTURES_ROOT - - -class ExampleConfig(datasets.BuilderConfig): - """BuilderConfig for CoNLL2002""" - - def __init__(self, parameter: str, **kwargs): - """BuilderConfig for CoNLL2002. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - self.parameter = parameter - - -@dataclass -class ExampleDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class Example(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = ExampleDocument - - BASE_DATASET_PATH = str(FIXTURES_ROOT / "builder" / "datasets" / "base_multi_config") - - # disable any mapping - BASE_CONFIG_KWARGS_DICT = None - - BUILDER_CONFIGS = [ - ExampleConfig( - name="es", - version=datasets.Version("1.0.0"), - description="CoNLL2002 Spanish dataset", - parameter="test", - ), - ExampleConfig( - name="nl", - version=datasets.Version("1.0.0"), - description="CoNLL2002 Dutch dataset", - parameter="test", - ), - ] - - def _generate_document_kwargs(self, dataset): - pass - - def _generate_document(self, example, int_to_str): - pass diff --git a/tests/fixtures/builder/datasets/single_config/single_config.py b/tests/fixtures/builder/datasets/single_config/single_config.py deleted file mode 100644 index 9c4ee313..00000000 --- a/tests/fixtures/builder/datasets/single_config/single_config.py +++ /dev/null @@ -1,51 +0,0 @@ -from dataclasses import dataclass -from typing import Type - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from tests import FIXTURES_ROOT - - -class ExampleConfig(datasets.BuilderConfig): - """BuilderConfig for CoNLL2003""" - - def __init__(self, parameter: str, **kwargs): - """BuilderConfig for CoNLL2003. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - self.parameter = parameter - - -@dataclass -class ExampleDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class Example(pytorch_ie.data.builder.GeneratorBasedBuilder): - DOCUMENT_TYPE = ExampleDocument - - BASE_DATASET_PATH = str(FIXTURES_ROOT / "builder" / "datasets" / "base_single_config") - - BUILDER_CONFIGS = [ - ExampleConfig( - name="conll2003", - version=datasets.Version("1.0.0"), - description="Example dataset", - parameter="test", - ), - ] - - # required to create config from scratch via kwargs - BUILDER_CONFIG_CLASS: Type[datasets.BuilderConfig] = ExampleConfig - - def _generate_document_kwargs(self, dataset): - pass - - def _generate_document(self, example, int_to_str): - pass diff --git a/tests/fixtures/builder/datasets/wrong_builder_class_config/wrong_builder_class_config.py b/tests/fixtures/builder/datasets/wrong_builder_class_config/wrong_builder_class_config.py deleted file mode 100644 index 7686c9ed..00000000 --- a/tests/fixtures/builder/datasets/wrong_builder_class_config/wrong_builder_class_config.py +++ /dev/null @@ -1,51 +0,0 @@ -from dataclasses import dataclass -from typing import Type - -import datasets - -import pytorch_ie.data.builder -from pytorch_ie.annotations import LabeledSpan -from pytorch_ie.core import AnnotationList, annotation_field -from pytorch_ie.documents import TextDocument -from tests import FIXTURES_ROOT - - -class ExampleConfig(datasets.BuilderConfig): - """BuilderConfig for CoNLL2003""" - - def __init__(self, parameter: str, **kwargs): - """BuilderConfig for CoNLL2003. - Args: - **kwargs: keyword arguments forwarded to super. - """ - super().__init__(**kwargs) - self.parameter = parameter - - -@dataclass -class ExampleDocument(TextDocument): - entities: AnnotationList[LabeledSpan] = annotation_field(target="text") - - -class Example(pytorch_ie.data.builder.ArrowBasedBuilder): - DOCUMENT_TYPE = ExampleDocument - - BASE_DATASET_PATH = str(FIXTURES_ROOT / "builder" / "datasets" / "base_single_config") - - BUILDER_CONFIGS = [ - ExampleConfig( - name="conll2003", - version=datasets.Version("1.0.0"), - description="Example dataset", - parameter="test", - ), - ] - - # required to create config from scratch via kwargs - BUILDER_CONFIG_CLASS: Type[datasets.BuilderConfig] = ExampleConfig - - def _generate_document_kwargs(self, dataset): - pass - - def _generate_document(self, example, int_to_str): - pass diff --git a/tests/data/__init__.py b/tests/metrics/__init__.py similarity index 100% rename from tests/data/__init__.py rename to tests/metrics/__init__.py diff --git a/tests/test_metrics.py b/tests/metrics/test_f1.py similarity index 100% rename from tests/test_metrics.py rename to tests/metrics/test_f1.py diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 1411f713..b39102b0 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,12 +1,9 @@ -import re - import pytest import torch import transformers from transformers.modeling_outputs import BaseModelOutputWithPooling import pytorch_ie.models.modules.mlp -from pytorch_ie.core.taskmodule import InplaceNotSupportedException from pytorch_ie.models.transformer_span_classification import TransformerSpanClassificationModel from pytorch_ie.pipeline import Pipeline from pytorch_ie.taskmodules.transformer_span_classification import ( @@ -122,29 +119,3 @@ def test_pipeline_with_documents(documents, prepared_taskmodule, mock_model, inp assert not (id(returned_document) == id(document)) assert not document.entities.predictions assert returned_document.entities.predictions - - -@pytest.mark.slow -@pytest.mark.parametrize("inplace", [False, True]) -def test_pipeline_with_dataset(dataset, prepared_taskmodule, mock_model, inplace): - train_dataset = dataset["train"] - - pipeline = Pipeline(model=mock_model, taskmodule=prepared_taskmodule, device=-1) - - if inplace: - with pytest.raises( - InplaceNotSupportedException, - match=re.escape( - "Immutable sequences of Documents (such as Datasets) can't be modified in place. Please set inplace=False." - ), - ): - returned_documents = pipeline(train_dataset, inplace=inplace) - else: - returned_documents = pipeline(train_dataset, inplace=inplace) - - assert len(train_dataset) == len(returned_documents) - - for returned_document, document in zip(returned_documents, train_dataset): - assert not (id(returned_document) == id(document)) - assert not document.entities.predictions - assert returned_document.entities.predictions