-
Notifications
You must be signed in to change notification settings - Fork 91
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #62 from georgian-io/automated_deployment
Automated deployment
- Loading branch information
Showing
131 changed files
with
734 additions
and
15,239 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"server": "tgi", | ||
"huggingface_repo": "NousResearch/Llama-2-7b-hf", | ||
"huggingface_token": "", | ||
"model_type": "llama", | ||
"task": "classification" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
BASE_DIR = "./benchmark_results" | ||
PROCESSED_DIR = f"{BASE_DIR}/processed" | ||
PLOTS_DIR = f"{BASE_DIR}/plots" | ||
RAW_DIR = f"{BASE_DIR}/raw" | ||
CONFIG_FILE_PATH = './config.json' | ||
|
||
# length = length + 1 (for comma), example: "34.5ms," "10m0s" | ||
MILLISECONDS_LENGTH = 3 | ||
MICROSECONDS_LENGTH = 3 | ||
SECONDS_LENGTH = 2 | ||
MINUTES_LENGTH = 4 | ||
|
||
NUMBER_OF_MS_IN_SECOND = 1000 | ||
NUMBER_OF_MICROSEC_IN_SECOND = 1000000 | ||
NUMBER_OF_SECONDS_IN_MINUTE = 60 | ||
|
||
TOO_MANY_REQUEST_ERROR = 429 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from enum import Enum | ||
|
||
class Server(Enum): | ||
TGI = "tgi" | ||
VLLM = "vllm" | ||
RAY = "ray" | ||
TRITON_VLLM = "triton_vllm" | ||
|
||
class Task(Enum): | ||
CLASSIFICATION = "classification" | ||
SUMMARIZATION = "summarization" |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import csv | ||
import os | ||
import numpy as np | ||
from enum_types import Server, Task | ||
from constants import MICROSECONDS_LENGTH, MILLISECONDS_LENGTH, SECONDS_LENGTH, MINUTES_LENGTH | ||
from constants import NUMBER_OF_MICROSEC_IN_SECOND, NUMBER_OF_MS_IN_SECOND, NUMBER_OF_SECONDS_IN_MINUTE | ||
from constants import TOO_MANY_REQUEST_ERROR | ||
import typer | ||
from utils import load_json | ||
from constants import CONFIG_FILE_PATH | ||
|
||
def save_data_for_final_table(csv_file_path, data): | ||
headers = ["model", "server", "rps", "latency_with_deviation", "throughput_with_deviation", "duration_with_deviation"] | ||
|
||
write_header = not os.path.exists(csv_file_path) or os.path.getsize(csv_file_path) == 0 | ||
|
||
with open(csv_file_path, mode='a', newline='') as file: | ||
writer = csv.writer(file) | ||
if write_header: | ||
writer.writerow(headers) | ||
writer.writerow(data) | ||
|
||
def convert_to_seconds(time): | ||
if 'ms' in time: | ||
return float(time[:-MILLISECONDS_LENGTH]) / NUMBER_OF_MS_IN_SECOND | ||
elif 'µs' in time: | ||
return float(time[:-MICROSECONDS_LENGTH]) / NUMBER_OF_MICROSEC_IN_SECOND | ||
elif 'm' in time and 's' in time: | ||
return float(time[:-MINUTES_LENGTH]) * NUMBER_OF_SECONDS_IN_MINUTE | ||
else: | ||
return float(time[:-SECONDS_LENGTH]) | ||
|
||
def get_metrics(raw_results_path: str, processed_results_path: str): | ||
rate = 0 | ||
config = load_json(CONFIG_FILE_PATH) | ||
server = config["server"] | ||
model = config["model_name"] | ||
with open(raw_results_path) as f: | ||
benchmark_logs = f.readlines() | ||
result_dict = {} | ||
max_total_request = 0 | ||
raws = [i.split() for i in benchmark_logs] | ||
for raw in raws: | ||
if len(raw) > 0: | ||
if raw[0] == TOO_MANY_REQUEST_ERROR: | ||
break | ||
if raw[0] == 'Requests': | ||
pos_of_total_request_value = 4 | ||
pos_of_throughput_value = 6 | ||
pos_of_rate_value = 5 | ||
|
||
total_request = int(raw[pos_of_total_request_value][:-1]) | ||
if result_dict.get(total_request) is None: | ||
result_dict[total_request] = {'latency': [], | ||
'throughput': [convert_to_seconds(raw[pos_of_throughput_value])], | ||
'count': 1, | ||
'duration': []} | ||
else: | ||
result_dict[total_request]['count'] += 1 | ||
result_dict[total_request]['throughput'].append(convert_to_seconds(raw[pos_of_throughput_value])) | ||
max_total_request = total_request | ||
rate = float(raw[pos_of_rate_value][:-1]) | ||
if raw[0] == 'Duration': | ||
pos_of_duration_value = 4 | ||
result_dict[max_total_request]['duration'].append(convert_to_seconds(raw[pos_of_duration_value])) | ||
if raw[0] == 'Latencies': | ||
pos_of_latency_value = 11 | ||
result_dict[max_total_request]['latency'].append(convert_to_seconds(raw[pos_of_latency_value])) | ||
|
||
keys_to_modify = ['latency', 'duration', 'throughput'] | ||
|
||
for num_req in result_dict.keys(): | ||
for key in keys_to_modify: | ||
mean_value = np.mean(result_dict[num_req][key]) | ||
std_deviation = np.std(result_dict[num_req][key]) | ||
|
||
formatted_mean = "{:.3f}".format(mean_value) | ||
formatted_std_dev = "{:.3f}".format(std_deviation) | ||
|
||
result_dict[num_req][f"{key}_with_deviation"] = f"{formatted_mean}±{formatted_std_dev}" | ||
result_dict[num_req][key] = mean_value | ||
|
||
save_data_for_final_table(processed_results_path, [model, server, rate, result_dict[max_total_request]['latency_with_deviation'], | ||
result_dict[max_total_request]['throughput_with_deviation'], | ||
result_dict[max_total_request]['duration_with_deviation']]) | ||
|
||
if __name__ == '__main__': | ||
|
||
typer.run(get_metrics) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
accelerate==0.21.0 | ||
bitsandbytes==0.39.0 | ||
datasets==2.12.0 | ||
evaluate==0.4.0 | ||
fastapi==0.104.1 | ||
numpy==1.24.3 | ||
pydantic==1.10.13 | ||
ray==2.8.0 | ||
starlette==0.27.0 | ||
tokenizers==0.14.1 | ||
torch==2.0.1 | ||
transformers==4.35.0 | ||
triton==2.0.0 | ||
tritonclient==2.39.0 | ||
typer==0.9.0 | ||
typing_extensions==4.8.0 | ||
uvicorn==0.23.2 | ||
vllm==0.2.1.post1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import subprocess | ||
from pathlib import Path | ||
import typer | ||
import json | ||
from utils import load_json | ||
from constants import PROCESSED_DIR, RAW_DIR, CONFIG_FILE_PATH | ||
from validation import validate_benchmark_config | ||
from validation import ValidationError | ||
|
||
def main(): | ||
|
||
config = load_json(CONFIG_FILE_PATH) | ||
try: | ||
validate_benchmark_config(config) | ||
except ValidationError as e: | ||
print(f"An error occurred: {e}") | ||
else: | ||
|
||
Path(PROCESSED_DIR).mkdir(parents=True, exist_ok=True) | ||
Path(RAW_DIR).mkdir(parents=True, exist_ok=True) | ||
|
||
server = config["server"] | ||
model_name = config["model_name"] | ||
raw_results_path = f"{RAW_DIR}/{model_name}_{server}.txt" | ||
processed_results_path = f"{PROCESSED_DIR}/{model_name}.csv" | ||
|
||
subprocess.run("docker stop $(docker ps -q)".split()) | ||
subprocess.run(["chmod", "+x", f"./script_benchmark.sh"]) | ||
print("Running benchmark...") | ||
subprocess.run([f"./script_benchmark.sh", raw_results_path, processed_results_path, | ||
config['duration'], config['rate']]) | ||
print("Benchmark is finished.") | ||
print(f"Raw results are saved at: {raw_results_path}") | ||
print(f"Processed results are saved at: {processed_results_path}") | ||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.