Skip to content

Commit

Permalink
add multi level log
Browse files Browse the repository at this point in the history
  • Loading branch information
Yunnglin committed Dec 27, 2024
1 parent fc5574d commit aad5561
Showing 1 changed file with 14 additions and 12 deletions.
26 changes: 14 additions & 12 deletions evalscope/collections/evaluator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import os
import pandas as pd
from collections import OrderedDict, defaultdict
from collections import defaultdict
from tabulate import tabulate
from tqdm import tqdm

Expand Down Expand Up @@ -92,34 +92,36 @@ def get_report(self, reviews):
})

df = pd.DataFrame(data)
# Explode tags to multiple rows
df_exploded = df.explode('tags')

# Helper function for aggregation and sorting
def aggregate_and_sort(df, group_by_cols):
report_df = df.groupby(group_by_cols).agg(
average_score=('score', 'mean'), count=('score', 'size')).reset_index()
return report_df.sort_values(by='count', ascending=False)
report_df = df.groupby(group_by_cols) \
.agg(average_score=('score', 'mean'), count=('score', 'size')) \
.reset_index() \
.sort_values(by='count', ascending=False) \
.to_dict(orient='records')
return report_df

# Multi-level aggregation
subset_report_df = aggregate_and_sort(df, ['task_type', 'dataset_name', 'subset_name'])
dataset_report_df = aggregate_and_sort(df, ['task_type', 'dataset_name'])
task_report_df = aggregate_and_sort(df, ['task_type'])

# Explode tags and aggregate
df_exploded = df.explode('tags')
tag_report_df = aggregate_and_sort(df_exploded, ['tags'])

# Convert sorted DataFrames to Dict
report = {
'subset_level': subset_report_df.to_dict(orient='records'),
'dataset_level': dataset_report_df.to_dict(orient='records'),
'task_level': task_report_df.to_dict(orient='records'),
'tag_level': tag_report_df.to_dict(orient='records')
'subset_level': subset_report_df,
'dataset_level': dataset_report_df,
'task_level': task_report_df,
'tag_level': tag_report_df
}

# Log the report
for level, data in report.items():
table = tabulate(data, headers='keys', tablefmt='pretty', showindex=False)
logger.info(f"{level} Report:\n{table}")
logger.info(f'{level} Report:\n{table}')

# Save the report to a JSON file
report_file_path = os.path.join(self.outputs.reports_dir, 'data_collection.json')
Expand Down

0 comments on commit aad5561

Please sign in to comment.