Skip to content

Commit

Permalink
Merge branch 'main' into init_round_4
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker authored Dec 20, 2024
2 parents 9f13265 + 40292aa commit 75496a8
Show file tree
Hide file tree
Showing 230 changed files with 14,905 additions and 10,557 deletions.
23 changes: 9 additions & 14 deletions .circleci/create_circleci_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def to_dict(self):

return {
"docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
"resource_class": "small",
"steps": steps,
}

Expand All @@ -67,9 +68,9 @@ class CircleCIJob:
install_steps: List[str] = None
marker: Optional[str] = None
parallelism: Optional[int] = 0
pytest_num_workers: int = 12
pytest_num_workers: int = 8
pytest_options: Dict[str, Any] = None
resource_class: Optional[str] = "2xlarge"
resource_class: Optional[str] = "xlarge"
tests_to_run: Optional[List[str]] = None
num_test_files_per_worker: Optional[int] = 10
# This should be only used for doctest job!
Expand Down Expand Up @@ -198,44 +199,40 @@ def job_name(self):
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="not generate",
parallelism=6,
pytest_num_workers=8
)

generate_job = CircleCIJob(
"generate",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="generate",
parallelism=6,
pytest_num_workers=8
)

tokenization_job = CircleCIJob(
"tokenization",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
parallelism=8,
pytest_num_workers=16
)

processor_job = CircleCIJob(
"processors",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
parallelism=8,
pytest_num_workers=6
)

tf_job = CircleCIJob(
"tf",
docker_image=[{"image":"huggingface/transformers-tf-light"}],
parallelism=6,
pytest_num_workers=16,
)


flax_job = CircleCIJob(
"flax",
docker_image=[{"image":"huggingface/transformers-jax-light"}],
parallelism=6,
pytest_num_workers=16
pytest_num_workers=16,
resource_class="2xlarge",
)


Expand All @@ -244,7 +241,7 @@ def job_name(self):
additional_env={"RUN_PIPELINE_TESTS": True},
docker_image=[{"image":"huggingface/transformers-torch-light"}],
marker="is_pipeline_test",
parallelism=4
parallelism=4,
)


Expand All @@ -253,7 +250,7 @@ def job_name(self):
additional_env={"RUN_PIPELINE_TESTS": True},
docker_image=[{"image":"huggingface/transformers-tf-light"}],
marker="is_pipeline_test",
parallelism=4
parallelism=4,
)


Expand All @@ -270,15 +267,13 @@ def job_name(self):
docker_image=[{"image":"huggingface/transformers-examples-torch"}],
# TODO @ArthurZucker remove this once docker is easier to build
install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
pytest_num_workers=8,
)


examples_tensorflow_job = CircleCIJob(
"examples_tensorflow",
additional_env={"OMP_NUM_THREADS": 8},
docker_image=[{"image":"huggingface/transformers-examples-tf"}],
pytest_num_workers=16,
)


Expand All @@ -293,6 +288,7 @@ def job_name(self):
],
marker="is_staging_test",
pytest_num_workers=2,
resource_class="medium",
)


Expand All @@ -305,13 +301,13 @@ def job_name(self):
],
pytest_options={"k onnx": None},
pytest_num_workers=1,
resource_class="small",
)


exotic_models_job = CircleCIJob(
"exotic_models",
docker_image=[{"image":"huggingface/transformers-exotic-models"}],
pytest_num_workers=12,
parallelism=4,
pytest_options={"durations": 100},
)
Expand All @@ -330,7 +326,6 @@ def job_name(self):
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="not generate",
parallelism=6,
pytest_num_workers=8,
)


Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
commit_id=$GITHUB_SHA
fi
commit_msg=$(git show -s --format=%s | cut -c1-70)
python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
env:
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
# Enable this to see debug logs
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/self-comment-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-22.04
name: Get PR number
# For security: only allow team members to run
if: contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez"]'), github.actor)
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
outputs:
PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
steps:
Expand Down
33 changes: 0 additions & 33 deletions .github/workflows/self-nightly-past-ci-caller.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,39 +21,6 @@ jobs:
echo "$(python3 -c 'print(int(${{ github.run_number }}) % 10)')"
echo "run_number=$(python3 -c 'print(int(${{ github.run_number }}) % 10)')" >> $GITHUB_OUTPUT
run_past_ci_pytorch_1-13:
name: PyTorch 1.13
needs: get_number
if: needs.get_number.outputs.run_number == 0 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
uses: ./.github/workflows/self-past-caller.yml
with:
framework: pytorch
version: "1.13"
sha: ${{ github.sha }}
secrets: inherit

run_past_ci_pytorch_1-12:
name: PyTorch 1.12
needs: get_number
if: needs.get_number.outputs.run_number == 1 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
uses: ./.github/workflows/self-past-caller.yml
with:
framework: pytorch
version: "1.12"
sha: ${{ github.sha }}
secrets: inherit

run_past_ci_pytorch_1-11:
name: PyTorch 1.11
needs: get_number
if: needs.get_number.outputs.run_number == 2 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
uses: ./.github/workflows/self-past-caller.yml
with:
framework: pytorch
version: "1.11"
sha: ${{ github.sha }}
secrets: inherit

run_past_ci_tensorflow_2-11:
name: TensorFlow 2.11
needs: get_number
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ The model itself is a regular [Pytorch `nn.Module`](https://pytorch.org/docs/sta

### With pip

This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 1.11+, and TensorFlow 2.6+.
This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 2.0+, and TensorFlow 2.6+.

You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).

Expand Down
49 changes: 49 additions & 0 deletions benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Benchmarks

You might want to add new benchmarks.

You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.

The expected function signature is the following:

```py
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
```

## Writing metrics to the database

`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.

cf [`llama.py`](./llama.py) to see an example of this in practice.

```py
from benchmarks_entrypoint import MetricsRecorder
import psycopg2

def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
# To collect device measurements
metrics_recorder.collect_device_measurements(
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
)
# To collect your model measurements
metrics_recorder.collect_model_measurements(
benchmark_id,
{
"model_load_time": model_load_time,
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
"first_eager_generate_time_secs": first_eager_generate_time,
"second_eager_generate_time_secs": second_eager_generate_time,
"time_to_first_token_secs": time_to_first_token,
"time_to_second_token_secs": time_to_second_token,
"time_to_third_token_secs": time_to_third_token,
"time_to_next_token_mean_secs": mean_time_to_next_token,
"first_compile_generate_time_secs": first_compile_generate_time,
"second_compile_generate_time_secs": second_compile_generate_time,
"third_compile_generate_time_secs": third_compile_generate_time,
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
},
)
```
144 changes: 144 additions & 0 deletions benchmark/benchmarks_entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import argparse
import importlib.util
import logging
import os
from typing import Dict
import psycopg2
import sys

from psycopg2.extras import Json
from psycopg2.extensions import register_adapter


register_adapter(dict, Json)


class ImportModuleException(Exception):
pass


class MetricsRecorder:
def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str):
self.conn = connection
self.conn.autocommit = True
self.logger = logger
self.branch = branch
self.commit_id = commit_id
self.commit_msg = commit_msg

def initialise_benchmark(self, metadata: Dict[str, str]) -> int:
"""
Creates a new benchmark, returns the benchmark id
"""
# gpu_name: str, model_id: str
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
(self.branch, self.commit_id, self.commit_msg, metadata),
)
benchmark_id = cur.fetchone()[0]
logger.debug(f"initialised benchmark #{benchmark_id}")
return benchmark_id

def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
"""
Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
"""
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
)
self.logger.debug(
f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
)

def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]):
with self.conn.cursor() as cur:
cur.execute(
"""
INSERT INTO model_measurements (
benchmark_id,
measurements
) VALUES (%s, %s)
""",
(
benchmark_id,
measurements,
),
)
self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")

def close(self):
self.conn.close()


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)


def parse_arguments():
"""
Parse command line arguments for the benchmarking CLI.
"""
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")

parser.add_argument(
"branch",
type=str,
help="The branch name on which the benchmarking is performed.",
)

parser.add_argument(
"commit_id",
type=str,
help="The commit hash on which the benchmarking is performed.",
)

parser.add_argument(
"commit_msg",
type=str,
help="The commit message associated with the commit, truncated to 70 characters.",
)

args = parser.parse_args()

return args.branch, args.commit_id, args.commit_msg


def import_from_path(module_name, file_path):
try:
spec = importlib.util.spec_from_file_location(module_name, file_path)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
except Exception as e:
raise ImportModuleException(f"failed to load python module: {e}")


if __name__ == "__main__":
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))

branch, commit_id, commit_msg = parse_arguments()

for entry in os.scandir(benchmarks_folder_path):
try:
if not entry.name.endswith(".py"):
continue
if entry.path == __file__:
continue
logger.debug(f"loading: {entry.name}")
module = import_from_path(entry.name.split(".")[0], entry.path)
logger.info(f"runnning benchmarks in: {entry.name}")
module.run_benchmark(logger, branch, commit_id, commit_msg)
except ImportModuleException as e:
logger.error(e)
except Exception as e:
logger.error(f"error running benchmarks for {entry.name}: {e}")
Loading

0 comments on commit 75496a8

Please sign in to comment.