Skip to content

Commit

Permalink
Merge pull request #101 from meedan/CV2-4801
Browse files Browse the repository at this point in the history
double check returned categories to make sure they are in-schema + tests
  • Loading branch information
ashkankzme authored Aug 6, 2024
2 parents 90893fc + d00bad4 commit a708313
Show file tree
Hide file tree
Showing 2 changed files with 312 additions and 3 deletions.
20 changes: 17 additions & 3 deletions lib/model/classycat_classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,25 @@ def classify_and_store_results(self, schema_id, items):
logger.info(f"Classification results: {classification_results}")
raise Exception(f"Not all items were classified successfully: "
f"input length {len(items)}, output length {len(classification_results)}")
# TODO: validate response label against schema https://meedan.atlassian.net/browse/CV2-4801

final_results = [{'id': items[i]['id'], 'text': items[i]['text'], 'labels': classification_results[i]}
for i in range(len(items))]
results_file_id = str(uuid.uuid4())
upload_file_to_s3(self.output_bucket, f"{schema_id}/{results_file_id}.json", json.dumps(final_results))

# filtering out the results that have out-of-schema labels
# our of schema labels will not be included in the final results,
# and items with no labels can be retried later by the user, indicated by an empty list for labels
permitted_labels = [topic['topic'] for topic in schema['topics']] + ['Other', 'Unsure']
for result in final_results:

# log the items that had at least one out-of-schema label
if not all([label in permitted_labels for label in result['labels']]):
logger.error(f"Item {result['id']} had out-of-schema labels: {result['labels']}, permitted labels: {permitted_labels}")

result['labels'] = [label for label in result['labels'] if label in permitted_labels]

if not all([len(result['labels']) == 0 for result in final_results]):
results_file_id = str(uuid.uuid4())
upload_file_to_s3(self.output_bucket, f"{schema_id}/{results_file_id}.json", json.dumps(final_results))

return final_results

Expand Down
Loading

0 comments on commit a708313

Please sign in to comment.