Skip to content

Commit

Permalink
fix bug and warning in data diagnosis
Browse files Browse the repository at this point in the history
  • Loading branch information
yukirora committed Aug 13, 2024
1 parent 7435f10 commit 86a25ec
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 11 deletions.
2 changes: 1 addition & 1 deletion superbench/analyzer/data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def aggregate(raw_data_df, pattern=None):
match = re.search(pattern, metric)
if match:
metric_in_list = list(metric)
for i in range(1, len(match.groups()) + 1):
for i in range(len(match.groups()) , 0, -1):
metric_in_list[match.start(i):match.end(i)] = '*'
short = ''.join(metric_in_list)
if short not in metric_store:
Expand Down
11 changes: 5 additions & 6 deletions superbench/analyzer/data_diagnosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,8 @@ def output_all_nodes_results(self, raw_data_df, data_not_accept_df):
all_data_df = data_not_accept_df[[
append_columns[index]
]].merge(all_data_df, left_index=True, right_index=True, how='right')
all_data_df['Accept'] = all_data_df['Accept'].replace(np.nan, True)
all_data_df['Number Of Issues'] = all_data_df['Number Of Issues'].replace(np.nan, 0)
all_data_df['Number Of Issues'] = all_data_df['Number Of Issues'].astype(int)
all_data_df['Accept'] = all_data_df['Accept'].replace(np.nan, 1).astype('bool')
all_data_df['Number Of Issues'] = all_data_df['Number Of Issues'].replace(np.nan, 0).astype('int')

return all_data_df

Expand Down Expand Up @@ -296,7 +295,7 @@ def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path):
data_not_accept_df (DataFrame): the DataFrame to output
output_path (str): the path of output jsonl file
"""
data_not_accept_df = data_not_accept_df.convert_dtypes().astype('object').fillna(self.na)
data_not_accept_df = data_not_accept_df.convert_dtypes().astype('object').infer_objects().fillna(self.na)
p = Path(output_path)
try:
data_not_accept_json = data_not_accept_df.to_json(orient='index')
Expand Down Expand Up @@ -327,7 +326,7 @@ def output_diagnosis_in_json(self, data_not_accept_df, output_path):
data_not_accept_df (DataFrame): the DataFrame to output
output_path (str): the path of output jsonl file
"""
data_not_accept_df = data_not_accept_df.convert_dtypes().astype('object').fillna(self.na)
data_not_accept_df = data_not_accept_df.convert_dtypes().astype('object').infer_objects().fillna(self.na)
data_not_accept_df = data_not_accept_df.reset_index()
data_not_accept_df = data_not_accept_df.rename(
columns={
Expand Down Expand Up @@ -378,7 +377,7 @@ def generate_md_lines(self, data_not_accept_df, rules, round):
data_not_accept_df = data_analysis.round_significant_decimal_places(
data_not_accept_df, round, [metric]
)
data_not_accept_df = data_not_accept_df.convert_dtypes().astype('object').fillna(self.na)
data_not_accept_df = data_not_accept_df.convert_dtypes().astype('object').infer_objects().fillna(self.na)
lines = file_handler.generate_md_table(data_not_accept_df, header)
return lines

Expand Down
10 changes: 6 additions & 4 deletions superbench/analyzer/diagnosis_rule_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,17 +239,19 @@ def failure_check(data_row, rule, summary_data_row, details, categories, raw_rul
violated_metric_num = 0
for metric_regex in raw_rule['metrics']:
match = False
violate = False
for metric in rule['metrics']:
if re.search(metric_regex, metric):
match = True
# metric not in raw_data or the value is none, miss test
if metric not in data_row or pd.isna(data_row[metric]):
violated_metric_num += 1
break
if RuleOp.miss_test(metric, rule, data_row, details, categories):
violate = True
# metric_regex written in rules is not matched by any metric, miss test
if not match:
violated_metric_num += 1
violate = True
RuleOp.add_categories_and_details(metric_regex + '_miss', rule['categories'], details, categories)
if violate:
violated_metric_num += 1
# return code != 0, failed test
violated_metric_num += RuleOp.value(data_row, rule, summary_data_row, details, categories)
return violated_metric_num
Expand Down

0 comments on commit 86a25ec

Please sign in to comment.