Merge branch 'main' into zhenzhong/gguf

intel · Jan 9, 2024 · b19cee3 · b19cee3
2 parents 428d4c7 + f7428ce
commit b19cee3
Show file tree

Hide file tree

Showing 3 changed files with 526 additions and 7 deletions.
diff --git a/...ws/scripts/models/calculate_percentage.py → tests/model-test/calculate_percentiles.py b/...ws/scripts/models/calculate_percentage.py → tests/model-test/calculate_percentiles.py
@@ -1,13 +1,31 @@
+#  Copyright (c) 2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 import numpy as np
 import re
 import sys
 import os
+
+
 def calculate_percentile(data, percentile):
     return np.percentile(data, percentile, method="closest_observation")
 
+
 def calculate_mean(data):
     return np.mean(data)
 
+
 def parse_output_file(file_path):
     predictions = []
     with open(file_path, 'r', encoding='UTF-8', errors='ignore') as file:
@@ -17,6 +35,8 @@ def parse_output_file(file_path):
                 prediction_time = float(match.group(1))  # Assuming the prediction time is in the second column
                 predictions.append(prediction_time)
     return predictions
+
+
 def parse_memory_file(memory_file):
     memory_values = []
     if os.path.exists(memory_file):
@@ -44,14 +64,15 @@ def parse_memory_file(memory_file):
     batch_size = sys.argv[5]
     model_input = sys.argv[6]
     model_output = sys.argv[7]
-    memory_file = os.environ.get("WORKING_DIR") + "/memory.txt"
+    memory_file = os.environ.get("WORKSPACE") + "/memory.txt"
     predictions = parse_output_file(output_file)
+    assert len(predictions) > 0, "Model has no ouput tokens!"
     first_token_latency = predictions[0]
     p90 = calculate_percentile(predictions, 90)
     p99 = calculate_percentile(predictions, 99)
     latency_mean = calculate_mean(predictions[1:])
     total_latency = np.sum(predictions)
-     
+
     print("P90: {:.2f} ms".format(p90))
     print("P99: {:.2f} ms".format(p99))
     print("average_latency: {:.2f} ms".format(latency_mean))
@@ -63,9 +84,10 @@ def parse_memory_file(memory_file):
     memory_mean = calculate_mean(top_50_percent)
 
     print("Memory Mean (Top 50%): {:.2f}".format(memory_mean))
-    log_file = os.environ.get("WORKING_DIR") + "/cpp_graph_summary.log"
-    link = os.environ.get("WORKING_DIR") + os.path.basename(output_file)
-    with open (log_file, 'a') as f:
+    log_file = os.environ.get("WORKSPACE") + "/cpp_graph_summary.log"
+    log_prefix = os.environ.get("log_prefix")
+    link = str(log_prefix) + os.path.basename(output_file)
+    with open(log_file, 'a') as f:
         f.write("engine,")
         f.write("latency,")
         f.write(model + ",")
@@ -81,8 +103,8 @@ def parse_memory_file(memory_file):
         f.write(link + ",")
         f.write("{:.2f},".format(p90))
         f.write("{:.2f},".format(p99))
-        #f.write(",latency:")
-        #for latency in predictions:
+        # f.write(",latency:")
+        # for latency in predictions:
         #    f.write(",{:.2f}".format(latency))
         f.write("\n")
         f.close()