Merge pull request #159 from IndicoDataSolutions/unbundling_metrics

adding unbundling metrics support
IndicoDataSolutions · Oct 2, 2023 · 7970e80 · 7970e80 · github-actions · Oct 2, 2023
2 parents f140b54 + bf726d1
commit 7970e80
Show file tree

Hide file tree

Showing 2 changed files with 129 additions and 0 deletions.
diff --git a/indico_toolkit/metrics/metrics.py b/indico_toolkit/metrics/metrics.py
@@ -9,6 +9,26 @@
 
 
 class ExtractionMetrics(IndicoWrapper):
+    """
+    Example usage:
+
+    metrics = ExtractionMetrics(client)
+    metrics.get_extraction_metrics(MODEL_GROUP_ID)
+
+    # get a pandas dataframe of field level results
+    df = metrics.get_metrics_df()
+    print(df.head())
+
+    # get metrics for a specific span type and/or model ID
+    df = metrics.get_metrics_df(span_type="exact", select_model_id=102)
+    print(df.head())
+
+    # write the results to a CSV (can also optionally pass span_type/model ID here as well)
+    metrics.to_csv("./my_metrics.pdf")
+
+    # get an interactive bar plot to visualize model improvement over time
+    metrics.bar_plot("./my_bar_plot.html")
+    """
     def __init__(self, client: IndicoClient):
         self.client = client
         self.raw_metrics: List[dict] = None
@@ -169,6 +189,112 @@ def to_csv(
         df.to_csv(output_path, index=False)
 
 
+
+class UnbundlingMetrics(ExtractionMetrics):
+    """
+    Example Usage:
+
+    um = UnbundlingMetrics(client)
+    um.get_metrics(1232)
+    um.line_plot("./my_metric_plot.html", metric="recall", title="Insurance Model Recall Improvement")
+
+    """
+    def get_metrics(self, model_group_id: int):
+        """
+        Collect all metrics available based on a Model Group ID for an Unbundling model
+        Args:
+            model_group_id (int): Model Group ID that you're interestd in (available within the Explain UI)
+        """
+        results = self.graphQL_request(METRIC_QUERY, {"modelGroupId": model_group_id})
+        if len(results["modelGroups"]["modelGroups"]) == 0:
+            raise ToolkitInputError(
+                f"There are no models associated with ID: {model_group_id}"
+            )
+        results = results["modelGroups"]["modelGroups"][0]["models"]
+        raw_metrics = []
+        included_models = []
+        labeled_samples = []
+        for r in results:
+            model_info = json.loads(r["modelInfo"])
+            if "total_number_of_examples" not in model_info or "metrics" not in model_info:
+                # some dictionaries don't come back with required fields... 
+                continue
+            labeled_samples.append(model_info["total_number_of_examples"])
+            included_models.append(r["id"])
+            raw_metrics.append(model_info["metrics"]["per_class_metrics"])
+        self.raw_metrics = raw_metrics
+        self.included_models = included_models
+        self.number_of_samples = {model_id:samples for model_id, samples in zip(included_models, labeled_samples)}
+
+
+    def get_metrics_df(self) -> pd.DataFrame:
+        cleaned_metrics = []
+        for model_id, metrics in zip(self.included_models, self.raw_metrics):
+            for class_name in metrics:
+                scores = metrics[class_name]["page"]
+                scores["field_name"] = class_name
+                scores["model_id"] = model_id
+                scores["number_of_samples"] = self.number_of_samples[model_id]
+                cleaned_metrics.append(scores)
+        df = pd.DataFrame(cleaned_metrics)
+        return df.sort_values(by=["field_name", "model_id"], ascending=False)
+
+
+    def line_plot(
+        self,
+        output_path: str,
+        metric: str = "f1_score",
+        plot_title: str = "",
+        ids_to_exclude: List[int] = [],
+        fields_to_exclude: List[str] = [],
+    ):
+        """
+        Write an html line plot to disc with # of samples on x-axis, a metric on the y-axis and
+        each line representing a distinct field.
+        Will also open the plot automatically in your browser, where you will interactive
+        functionality and the ability to download a copy as a PNG as well.
+
+        Args:
+            output_path (str): where you want to write plot, e.g. "./myplot.html"
+            span_type (str): options include 'superset', 'exact', 'overlap' or 'token'
+            metric (str, optional): possible values are 'precision', 'recall', 'f1_score', 'false_positives',
+                                    'false_negatives', 'true_positives'. Defaults to "f1_score".
+            plot_title (str, optional): Title of the plot. Defaults to "".
+            ids_to_exclude (List[int], optional): Model Ids to exclude from plot.
+            fields_to_exclude (List[str], optional): Field Names to exclude from plot.
+        """
+        df = self.get_metrics_df()
+        if ids_to_exclude:
+            df = df.drop(df.loc[df["model_id"].isin(ids_to_exclude)].index)
+        if fields_to_exclude:
+            df = df.drop(df.loc[df["field_name"].isin(fields_to_exclude)].index)
+        df = df.sort_values(by=["field_name", "number_of_samples", metric])
+        plotting = Plotting()
+        for field in sorted(df["field_name"].unique()):
+            sub_df = df.loc[df["field_name"] == field].copy()
+            # ensure only one value per # of samples
+            sub_df = sub_df.drop_duplicates(subset=["number_of_samples"])
+            plotting.add_line_data(
+                sub_df["number_of_samples"],
+                sub_df[metric],
+                name=field,
+                color=None,
+            )
+        plotting.define_layout(
+            xaxis_title="Number of Samples",
+            legend_title="Field",
+            plot_title=plot_title,
+            yaxis_title=metric,
+        )
+        plotting.plot(output_path)
+
+    def bar_plot(self):
+        raise NotImplementedError("Bar Plot is not currently implemented for unbundling")
+
+    def get_extraction_metrics(self, model_group_id: int):
+        raise NotImplementedError("Not available for unbundling class")
+
+
 METRIC_QUERY = """ 
 query modelGroupMetrics($modelGroupId: Int!){
             modelGroups(

diff --git a/tests/indico_wrapper/test_dataset.py b/tests/indico_wrapper/test_dataset.py
@@ -1,3 +1,6 @@
+"""
+Test Datasets class methods
+"""
 import pytest
 from indico_toolkit.indico_wrapper import Datasets
 from indico.types import Dataset
File	Stmts	Miss	Cover	Missing
indico_toolkit
errors.py	14	1	93%	22
indico_toolkit/association
association.py	35	2	94%	20, 40
extracted_tokens.py	58	1	98%	88
line_items.py	92	3	97%	151–152, 164
positioning.py	118	2	98%	231, 251
split_merged_values.py	24	1	96%	49
indico_toolkit/auto_class
classifier.py	77	7	91%	62, 65–69, 151
indico_toolkit/auto_review
auto_reviewer.py	24	2	92%	65–66
review_config.py	15	2	87%	32, 36
indico_toolkit/highlighter
highlighter.py	129	11	91%	39, 50, 131, 208, 242–248
indico_toolkit/indico_wrapper
dataset.py	34	7	79%	42–45, 56, 98–99
doc_extraction.py	34	1	97%	53
download.py	50	3	94%	46, 104, 167
indico_wrapper.py	32	5	84%	57–59, 109, 112
reviewer.py	27	4	85%	42–43, 51–52
workflow.py	79	19	76%	42, 76, 88–93, 137–142, 144, 149, 207–210
indico_toolkit/metrics
compare_ground_truth.py	66	4	94%	30, 37, 92, 94
compare_models.py	63	10	84%	57, 102–114, 125, 128, 134
metrics.py	118	70	41%	42, 68, 109–134, 160–183, 215–237, 243–252, 277–300, 303, 308
plotting.py	15	2	87%	66, 80
indico_toolkit/ocr
customocr_object.py	23	3	87%	25, 29, 41
ondoc_object.py	41	2	95%	81, 92
indico_toolkit/pipelines
file_processing.py	90	3	97%	66, 70, 106
pdf_manipulation.py	33	4	88%	16–18, 63
indico_toolkit/snapshots
snapshot.py	155	16	90%	92, 147–148, 185, 263, 281, 284–288, 295–296, 302–303, 307–308
indico_toolkit/staggered_loop
metrics.py	478	439	8%	25–27, 41–45, 59–62, 76–79, 93–96, 114–115, 132–151, 172–222, 250–290, 308, 326–328, 347–349, 369–387, 403–407, 436–450, 474–488, 518–530, 552–565, 597–609, 633–637, 719–821, 839, 862–868, 888–902, 922, 947–958, 984–1002, 1024–1029, 1053–1060, 1101–1226, 1273–1433
staggered_loop.py	239	161	33%	76–78, 81–104, 107–146, 157–173, 194–221, 248–290, 312–325, 378–436, 450–503, 525–532, 535–549, 565–654, 663–679
indico_toolkit/structure
create_structure.py	58	58	0%	1–206
utils.py	9	9	0%	1–13
indico_toolkit/types
classification.py	43	1	98%	75
extractions.py	115	4	97%	151, 166, 169, 179
workflow_object.py	64	7	89%	29, 86, 90, 94, 98, 102, 106
TOTAL	2606	864	67%