Skip to content

Commit

Permalink
add evaluation tests
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Nov 17, 2024
1 parent 0145098 commit c9a5ba3
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 30 deletions.
8 changes: 7 additions & 1 deletion src/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def preprocess_images(
allow_empty = True
else:
allow_empty = False

crop_annotation = process_image(
image_path=image_path,
annotation_df=annotation_df,
Expand Down Expand Up @@ -141,6 +141,12 @@ def process_image(

full_path = os.path.join(root_dir, image_path)

# Check if all xmin values are 0, indicating empty annotations
if annotation_df is not None and all(annotation_df['xmin'] == 0):
allow_empty = True
else:
allow_empty = False

crop_annotation = preprocess.split_raster(
path_to_raster=full_path,
annotations_file=annotation_df,
Expand Down
7 changes: 2 additions & 5 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@
import warnings
from logging import warn
import math
from datetime import datetime

# Third party imports
import dask.array as da
import pandas as pd
from deepforest import main, visualize
from deepforest.utilities import read_file
from pytorch_lightning.loggers import CometLogger

# Local imports
Expand Down Expand Up @@ -87,7 +85,6 @@ def create_train_test(annotations, train_test_split = 0.1):
pd.DataFrame: A DataFrame containing training annotations.
pd.DataFrame: A DataFrame containing validation annotations.
"""
tmpdir = tempfile.gettempdir()
# split train images into 90% train and 10% validation for each class as much as possible
test_images = []
validation_df = None
Expand Down Expand Up @@ -164,7 +161,7 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
model.create_trainer()

with comet_logger.experiment.context_manager("train_images"):
non_empty_train_annotations = train_annotations[train_annotations.xmax.notnull()]
non_empty_train_annotations = train_annotations[~(train_annotations.xmax==0)]
if non_empty_train_annotations.empty:
pass
else:
Expand All @@ -178,7 +175,7 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
model.trainer.fit(model)

with comet_logger.experiment.context_manager("post-training prediction"):
for image_path in test_annotations.image_path.sample(5):
for image_path in test_annotations.image_path.head(5):
prediction = model.predict_image(path = os.path.join(train_image_dir, image_path))
if prediction is None:
continue
Expand Down
2 changes: 1 addition & 1 deletion src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def run(self):
label_propagator.through_time(new_annotations)

if self.config.train.validation_csv_path is not None:
validation_df = pd.read_csv(self.config.validation_csv_path)
validation_df = pd.read_csv(self.config.train.validation_csv_path)
else:
validation_df = None

Expand Down
39 changes: 28 additions & 11 deletions src/pipeline_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
import pandas as pd

class PipelineEvaluation:
def __init__(self, model, detect_ground_truth_dir=None, classify_confident_ground_truth_dir=None, classify_uncertain_ground_truth_dir=None, detection_true_positive_threshold=0.8, detection_false_positive_threshold=0.5, classification_avg_score=0.5, target_labels=None, patch_size=450, patch_overlap=0, min_score=0.5):
def __init__(self, model, image_dir, detect_ground_truth_dir=None, classify_confident_ground_truth_dir=None, classify_uncertain_ground_truth_dir=None, detection_true_positive_threshold=0.8, detection_false_positive_threshold=0.5, classification_avg_score=0.5, target_labels=None, patch_size=450, patch_overlap=0, min_score=0.5):
"""Initialize pipeline evaluation.
Args:
model: Trained model for making predictions
image_dir (str): Directory containing images
detect_ground_truth_dir (str): Directory containing detection ground truth annotation CSV files
classify_confident_ground_truth_dir (str): Directory containing confident classification ground truth annotation CSV files
classify_uncertain_ground_truth_dir (str): Directory containing uncertain classification ground truth annotation CSV files
Expand All @@ -28,7 +29,10 @@ def __init__(self, model, detect_ground_truth_dir=None, classify_confident_groun
self.patch_size = patch_size
self.patch_overlap = patch_overlap
self.min_score = min_score

self.detection_ground_truth_dir = detect_ground_truth_dir
self.confident_classification_ground_truth_dir = classify_confident_ground_truth_dir
self.uncertain_classification_ground_truth_dir = classify_uncertain_ground_truth_dir
self.image_dir = image_dir
self.detection_annotations_df = gather_data(detect_ground_truth_dir)
self.confident_classification_annotations_df = gather_data(classify_confident_ground_truth_dir)
self.uncertain_classification_annotations_df = gather_data(classify_uncertain_ground_truth_dir)
Expand Down Expand Up @@ -56,9 +60,10 @@ def _format_targets(self, annotations_df):
return targets

def evaluate_detection(self):
full_image_paths = [self.image_dir + "/" + image_path for image_path in self.detection_annotations_df.image_path.tolist()]
preds = predict(
model=self.model,
image_paths=self.detection_annotations_df.image_path.tolist(),
m=self.model,
image_paths=full_image_paths,
patch_size=self.patch_size,
patch_overlap=self.patch_overlap,
min_score=self.min_score
Expand All @@ -67,23 +72,28 @@ def evaluate_detection(self):

self.mAP.update(preds=preds, target=targets)

return self.mAP.compute()
results = {"mAP": self.mAP.compute()}

return results

def confident_classification_accuracy(self):
self.classification_accuracy.update(self.classification_confident_annotations_df)
return self.classification_accuracy.compute()
results = {"confident_classification_accuracy": self.classification_accuracy.compute()}
return results

def uncertain_classification_accuracy(self):
self.classification_accuracy.update(self.classification_uncertain_annotations_df)
return self.classification_accuracy.compute()
results = {"uncertain_classification_accuracy": self.classification_accuracy.compute()}
return results

def target_classification_accuracy(self):
# Combine confident and uncertain classifications
combined_annotations_df = pd.concat([self.classification_confident_annotations_df, self.classification_uncertain_annotations_df])
if self.target_classes is not None:
self.confident_classification_accuracy.update(combined_annotations_df, self.target_classes)
self.uncertain_classification_accuracy.update(combined_annotations_df, self.target_classes)
return self.confident_classification_accuracy.compute(), self.uncertain_classification_accuracy.compute()
results = {"target_classification_accuracy": {"confident_classification_accuracy": self.confident_classification_accuracy.compute(), "target_uncertain_classification_accuracy": self.uncertain_classification_accuracy.compute()}}
return results
else:
return None, None

Expand All @@ -92,9 +102,16 @@ def evaluate(self):
Evaluate pipeline performance for both detection and classification
"""
self.detection_results = self.evaluate_detection()
self.confident_classification_results = self.confident_classification_accuracy()
self.uncertain_classification_results = self.uncertain_classification_accuracy()
results = {}
detection_results = self.evaluate_detection()
confident_classification_results = self.confident_classification_accuracy()
uncertain_classification_results = self.uncertain_classification_accuracy()
if self.target_classes is not None:
target_classification_results = self.target_classification_accuracy()

results = {"detection": detection_results, "confident_classficiation":confident_classification_results, "uncertain_classification":uncertain_classification_results, "target_classification":target_classification_results}

return results

def check_success(self):
"""Check if pipeline performance is satisfactory"""
Expand Down
40 changes: 28 additions & 12 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,40 +40,56 @@ def config(tmpdir_factory):
shutil.copy("tests/data/" + f, cfg.train.train_image_dir)

# Create sample bounding box annotations
data = {
'image_path': ['empty.jpg', 'birds.jpg', 'birds_val.jpg'],
train_data = {
'image_path': ['empty.jpg', 'birds.jpg',"birds.jpg"],
'xmin': [0, 200, 150],
'ymin': [0, 300, 250],
'ymin': [0, 300, 250],
'xmax': [0, 300, 250],
'ymax': [0, 400, 350],
'label': ['Bird', 'Bird', 'Bird'],
'label': ['Bird', 'Bird1', 'Bird2'],
'annotator': ['test_user', 'test_user', 'test_user']
}

# Create DataFrame
df = pd.DataFrame(data)
val_data = {
'image_path': ['birds_val.jpg', 'birds_val.jpg'],
'xmin': [150, 150],
'ymin': [250, 250],
'xmax': [250, 250],
'ymax': [350, 350],
'label': ['Bird1', 'Bird2'],
'annotator': ['test_user', 'test_user']
}

# Create DataFrames
train_df = pd.DataFrame(train_data)
val_df = pd.DataFrame(val_data)

# Save training data to CSV
train_csv_path = os.path.join(cfg.train.train_csv_folder, 'training_data.csv')
train_df.to_csv(train_csv_path, index=False)

# Save to CSV in the configured training directory
csv_path = os.path.join(cfg.train.train_csv_folder, 'training_data.csv')
df.to_csv(csv_path, index=False)
# Save validation data to CSV
val_csv_path = os.path.join(cfg.train.train_csv_folder, 'validation.csv')
val_df.to_csv(val_csv_path, index=False)

cfg.train.validation_csv_path = val_csv_path
cfg.train.fast_dev_run = True
cfg.checkpoint = "bird"
cfg.train.checkpoint_dir = tmpdir_factory.mktemp("checkpoints").strpath

# Create detection annotations
cfg.pipeline_evaluation.detect_ground_truth_dir = tmpdir_factory.mktemp("detection_annotations").strpath
csv_path = os.path.join(cfg.pipeline_evaluation.detect_ground_truth_dir, 'detection_annotations.csv')
df.to_csv(csv_path, index=False)
val_df.to_csv(csv_path, index=False)

# Create classification annotations
cfg.pipeline_evaluation.classify_confident_ground_truth_dir = tmpdir_factory.mktemp("confident_classification_annotations").strpath
csv_path = os.path.join(cfg.pipeline_evaluation.classify_confident_ground_truth_dir, 'confident_classification_annotations.csv')
df.to_csv(csv_path, index=False)
val_df.to_csv(csv_path, index=False)

cfg.pipeline_evaluation.classify_uncertain_ground_truth_dir = tmpdir_factory.mktemp("uncertain_classification_annotations").strpath
csv_path = os.path.join(cfg.pipeline_evaluation.classify_uncertain_ground_truth_dir, 'uncertain_classification_annotations.csv')
df.to_csv(csv_path, index=False)
val_df.to_csv(csv_path, index=False)

return cfg

Expand Down
27 changes: 27 additions & 0 deletions tests/test_pipeline_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from src.pipeline_evaluation import PipelineEvaluation
from deepforest import main

def test_pipeline_evaluation(config):
m = main.deepforest()
pipeline_evaluation = PipelineEvaluation(model=m, **config.pipeline_evaluation)
performance = pipeline_evaluation.evaluate()

def test_check_success(config):
m = main.deepforest()
pipeline_evaluation = PipelineEvaluation(model=m, **config.pipeline_evaluation)
assert pipeline_evaluation.check_success() is False

def test_evaluate_detection(config):
m = main.deepforest()
pipeline_evaluation = PipelineEvaluation(model=m, **config.pipeline_evaluation)
detection_results = pipeline_evaluation.evaluate_detection()
assert detection_results["mAP"] is not None

def test_confident_classification_accuracy(config):
m = main.deepforest()
pipeline_evaluation = PipelineEvaluation(model=m, **config.pipeline_evaluation)
confident_classification_accuracy = pipeline_evaluation.confident_classification_accuracy()
assert confident_classification_accuracy is not None



0 comments on commit c9a5ba3

Please sign in to comment.