Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into zhenzhong/gguf
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhenzhong1 committed Jan 9, 2024
2 parents 428d4c7 + f7428ce commit b19cee3
Show file tree
Hide file tree
Showing 3 changed files with 526 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -1,13 +1,31 @@
# Copyright (c) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import re
import sys
import os


def calculate_percentile(data, percentile):
return np.percentile(data, percentile, method="closest_observation")


def calculate_mean(data):
return np.mean(data)


def parse_output_file(file_path):
predictions = []
with open(file_path, 'r', encoding='UTF-8', errors='ignore') as file:
Expand All @@ -17,6 +35,8 @@ def parse_output_file(file_path):
prediction_time = float(match.group(1)) # Assuming the prediction time is in the second column
predictions.append(prediction_time)
return predictions


def parse_memory_file(memory_file):
memory_values = []
if os.path.exists(memory_file):
Expand Down Expand Up @@ -44,14 +64,15 @@ def parse_memory_file(memory_file):
batch_size = sys.argv[5]
model_input = sys.argv[6]
model_output = sys.argv[7]
memory_file = os.environ.get("WORKING_DIR") + "/memory.txt"
memory_file = os.environ.get("WORKSPACE") + "/memory.txt"
predictions = parse_output_file(output_file)
assert len(predictions) > 0, "Model has no ouput tokens!"
first_token_latency = predictions[0]
p90 = calculate_percentile(predictions, 90)
p99 = calculate_percentile(predictions, 99)
latency_mean = calculate_mean(predictions[1:])
total_latency = np.sum(predictions)

print("P90: {:.2f} ms".format(p90))
print("P99: {:.2f} ms".format(p99))
print("average_latency: {:.2f} ms".format(latency_mean))
Expand All @@ -63,9 +84,10 @@ def parse_memory_file(memory_file):
memory_mean = calculate_mean(top_50_percent)

print("Memory Mean (Top 50%): {:.2f}".format(memory_mean))
log_file = os.environ.get("WORKING_DIR") + "/cpp_graph_summary.log"
link = os.environ.get("WORKING_DIR") + os.path.basename(output_file)
with open (log_file, 'a') as f:
log_file = os.environ.get("WORKSPACE") + "/cpp_graph_summary.log"
log_prefix = os.environ.get("log_prefix")
link = str(log_prefix) + os.path.basename(output_file)
with open(log_file, 'a') as f:
f.write("engine,")
f.write("latency,")
f.write(model + ",")
Expand All @@ -81,8 +103,8 @@ def parse_memory_file(memory_file):
f.write(link + ",")
f.write("{:.2f},".format(p90))
f.write("{:.2f},".format(p99))
#f.write(",latency:")
#for latency in predictions:
# f.write(",latency:")
# for latency in predictions:
# f.write(",{:.2f}".format(latency))
f.write("\n")
f.close()
Loading

0 comments on commit b19cee3

Please sign in to comment.