Skip to content

Commit

Permalink
add multi level log
Browse files Browse the repository at this point in the history
  • Loading branch information
Yunnglin committed Dec 27, 2024
1 parent 08240b1 commit fc5574d
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 16 deletions.
34 changes: 21 additions & 13 deletions evalscope/collections/evaluator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import json
import os
import pandas as pd
from collections import defaultdict
from datetime import datetime
from collections import OrderedDict, defaultdict
from tabulate import tabulate
from tqdm import tqdm

from evalscope.benchmarks import Benchmark
Expand Down Expand Up @@ -93,25 +93,33 @@ def get_report(self, reviews):

df = pd.DataFrame(data)

# Multi-level aggregation
subset_report_df = df.groupby(['task_type', 'dataset_name', 'subset_name']).agg(
average_score=('score', 'mean'), count=('score', 'size')).reset_index()
# Helper function for aggregation and sorting
def aggregate_and_sort(df, group_by_cols):
report_df = df.groupby(group_by_cols).agg(
average_score=('score', 'mean'), count=('score', 'size')).reset_index()
return report_df.sort_values(by='count', ascending=False)

dataset_report_df = df.groupby(['task_type', 'dataset_name']).agg(
average_score=('score', 'mean'), count=('score', 'size')).reset_index()
# Multi-level aggregation
subset_report_df = aggregate_and_sort(df, ['task_type', 'dataset_name', 'subset_name'])
dataset_report_df = aggregate_and_sort(df, ['task_type', 'dataset_name'])
task_report_df = aggregate_and_sort(df, ['task_type'])

task_report_df = df.groupby(['task_type']).agg(
average_score=('score', 'mean'), count=('score', 'size')).reset_index()
# Explode tags and aggregate
df_exploded = df.explode('tags')
tag_report_df = aggregate_and_sort(df_exploded, ['tags'])

# Combine all reports into a single dictionary
# Convert sorted DataFrames to Dict
report = {
'subset_level': subset_report_df.to_dict(orient='records'),
'dataset_level': dataset_report_df.to_dict(orient='records'),
'task_level': task_report_df.to_dict(orient='records')
'task_level': task_report_df.to_dict(orient='records'),
'tag_level': tag_report_df.to_dict(orient='records')
}

# Log the report
logger.info(f"Report:\n{pd.DataFrame(report['subset_level']).to_markdown(index=False)}")
for level, data in report.items():
table = tabulate(data, headers='keys', tablefmt='pretty', showindex=False)
logger.info(f"{level} Report:\n{table}")

# Save the report to a JSON file
report_file_path = os.path.join(self.outputs.reports_dir, 'data_collection.json')
Expand Down Expand Up @@ -140,7 +148,7 @@ def get_reviews(self, answers):

@staticmethod
def get_pred_score(review_d) -> float:
return review_d[AnswerKeys.CHOICES][0][ReviewKeys.REVIEW][ReviewKeys.RESULT]
return float(review_d[AnswerKeys.CHOICES][0][ReviewKeys.REVIEW][ReviewKeys.RESULT])

def eval(self, **kwargs):
answers = self.get_answers()
Expand Down
12 changes: 10 additions & 2 deletions evalscope/collections/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,22 @@ def from_dict(cls, data):
instance.datasets.append(DatasetInfo(**dataset))
return instance

def flatten(self) -> List[DatasetInfo]:
def flatten(self, parent_names=None) -> List[DatasetInfo]:
if parent_names is None:
parent_names = []

flat_datasets = []
current_names = parent_names + [self.name]

for dataset in self.datasets:
if isinstance(dataset, CollectionSchema):
nested_datasets = dataset.flatten()
nested_datasets = dataset.flatten(current_names)
flat_datasets.extend(nested_datasets)
else:
# Add all parent CollectionSchema names to the tags of each DatasetInfo
for name in current_names:
if name not in dataset.tags:
dataset.tags.append(name)
flat_datasets.append(dataset)
return flat_datasets

Expand Down
3 changes: 2 additions & 1 deletion tests/cli/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ def test_evaluate_collection(self):
eval_type=EvalType.SERVICE,
datasets=['data_collection'],
dataset_args={'data_collection': {
'local_path': 'outputs/mixed_data_test.jsonl'
# 'local_path': 'outputs/mixed_data_test.jsonl'
'local_path': 'outputs/mixed_data.jsonl'
}},
)
run_task(task_cfg=task_cfg)

0 comments on commit fc5574d

Please sign in to comment.