Skip to content

Commit

Permalink
Merge branch 'main' into add-got-ocr2
Browse files Browse the repository at this point in the history
  • Loading branch information
yonigozlan authored Dec 16, 2024
2 parents 879fe3e + d5b81e1 commit 79e5734
Show file tree
Hide file tree
Showing 428 changed files with 14,058 additions and 3,916 deletions.
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ jobs:
name: "Prepare pipeline parameters"
command: |
python utils/process_test_artifacts.py
# To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
# Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
# We used:

# https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
# We could not pass a nested dict, which is why we create the test_file_... parameters for every single job

- store_artifacts:
path: test_preparation/transformed_artifacts.json
- store_artifacts:
Expand Down
37 changes: 31 additions & 6 deletions .circleci/create_circleci_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,30 @@
"RUN_PT_FLAX_CROSS_TESTS": False,
}
# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsf":None}
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsfE":None}
DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]


class EmptyJob:
job_name = "empty"

def to_dict(self):
steps = [{"run": 'ls -la'}]
if self.job_name == "collection_job":
steps.extend(
[
"checkout",
{"run": "pip install requests || true"},
{"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
{"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
{"store_artifacts": {"path": "outputs"}},
{"run": 'echo "All required jobs have now completed"'},
]
)

return {
"docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
"steps":["checkout"],
"steps": steps,
}


Expand Down Expand Up @@ -133,7 +146,7 @@ def to_dict(self):
"command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}
},
{"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
{"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>>' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
{"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>> --header "Circle-Token: $CIRCLE_TOKEN"' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
{"run": {"name": "Split tests across parallel nodes: show current parallel tests",
"command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
}
Expand Down Expand Up @@ -352,6 +365,7 @@ def job_name(self):
DOC_TESTS = [doc_test_job]
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip


def create_circleci_config(folder=None):
if folder is None:
folder = os.getcwd()
Expand All @@ -361,7 +375,13 @@ def create_circleci_config(folder=None):

if len(jobs) == 0:
jobs = [EmptyJob()]
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
else:
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
# Add a job waiting all the test jobs and aggregate their test summary files at the end
collection_job = EmptyJob()
collection_job.job_name = "collection_job"
jobs = [collection_job] + jobs

config = {
"version": "2.1",
"parameters": {
Expand All @@ -371,9 +391,14 @@ def create_circleci_config(folder=None):
**{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
**{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
},
"jobs" : {j.job_name: j.to_dict() for j in jobs},
"workflows": {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
"jobs": {j.job_name: j.to_dict() for j in jobs}
}
if "CIRCLE_TOKEN" in os.environ:
# For private forked repo. (e.g. new model addition)
config["workflows"] = {"version": 2, "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}}
else:
# For public repo. (e.g. `transformers`)
config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
with open(os.path.join(folder, "generated_config.yml"), "w") as f:
f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>"))

Expand Down
50 changes: 25 additions & 25 deletions .github/workflows/self-push-amd-mi210-caller.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
name: Self-hosted runner (AMD mi210 CI caller)

on:
workflow_run:
workflows: ["Self-hosted runner (push-caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"

jobs:
run_amd_ci:
name: AMD mi210
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi210
secrets: inherit
name: Self-hosted runner (AMD mi210 CI caller)

on:
#workflow_run:
# workflows: ["Self-hosted runner (push-caller)"]
# branches: ["main"]
# types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"

jobs:
run_amd_ci:
name: AMD mi210
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi210
secrets: inherit
50 changes: 25 additions & 25 deletions .github/workflows/self-push-amd-mi250-caller.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
name: Self-hosted runner (AMD mi250 CI caller)

on:
workflow_run:
workflows: ["Self-hosted runner (push-caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"

jobs:
run_amd_ci:
name: AMD mi250
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi250
secrets: inherit
name: Self-hosted runner (AMD mi250 CI caller)

on:
#workflow_run:
# workflows: ["Self-hosted runner (push-caller)"]
# branches: ["main"]
# types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"

jobs:
run_amd_ci:
name: AMD mi250
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi250
secrets: inherit
8 changes: 4 additions & 4 deletions .github/workflows/self-push-amd-mi300-caller.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
name: Self-hosted runner (AMD mi300 CI caller)

on:
workflow_run:
workflows: ["Self-hosted runner (push-caller)"]
branches: ["main"]
types: [completed]
#workflow_run:
# workflows: ["Self-hosted runner (push-caller)"]
# branches: ["main"]
# types: [completed]
push:
branches:
- run_amd_push_ci_caller*
Expand Down
8 changes: 4 additions & 4 deletions docker/transformers-pytorch-amd-gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rocm/dev-ubuntu-22.04:6.0.2
FROM rocm/dev-ubuntu-22.04:6.1
# rocm/pytorch has no version with 2.1.0
LABEL maintainer="Hugging Face"

Expand All @@ -11,7 +11,7 @@ RUN apt update && \

RUN python3 -m pip install --no-cache-dir --upgrade pip numpy

RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1

RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"

Expand All @@ -30,5 +30,5 @@ RUN python3 -m pip uninstall -y tensorflow flax
# this line must be added in order for python to be aware of transformers.
RUN cd transformers && python3 setup.py develop

# Remove nvml as it is not compatible with ROCm. apex is not tested on NVIDIA either.
RUN python3 -m pip uninstall py3nvml pynvml apex -y
# Remove nvml and nvidia-ml-py as it is not compatible with ROCm. apex is not tested on NVIDIA either.
RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
10 changes: 8 additions & 2 deletions docs/source/ar/_toctree.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,18 @@
title: المعايير
- local: notebooks
title: دفاتر الملاحظات مع الأمثلة
# - local: community
# title: موارد المجتمع
- local: community
title: موارد المجتمع
- local: troubleshooting
title: استكشاف الأخطاء وإصلاحها
- local: gguf
title: التوافق مع ملفات GGUF
- local: tiktoken
title: التوافق مع ملفات TikToken
- local: modular_transformers
title: الوحدات النمطية في `transformers`
- local: how_to_hack_models
title: اختراق النموذج (الكتابة فوق فئة لاستخدامك)
title: أدلة المطورين
# - sections:
# - local: quantization/overview
Expand Down
Loading

0 comments on commit 79e5734

Please sign in to comment.