Merge pull request #34 from MLOps-essi-upc/md-deepchecks

Deepchecks added
MLOps-essi-upc · Dec 12, 2023 · 3fec1ef · 3fec1ef
2 parents d4282b0 + 72f5f7a
commit 3fec1ef
Show file tree

Hide file tree

Showing 5 changed files with 708 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -134,3 +134,4 @@ The data has been sourced from repository at huggingface (https://huggingface.co
     url="https://github.com/AI-Lab-Makerere/ibean/"
 }
 ```
+
diff --git a/requirements.txt b/requirements.txt
@@ -19,4 +19,10 @@ pydantic~=1.10.13
 flask~=3.0.0
 python-dotenv~=1.0.0
 pandas~=2.0.3
-mlflow~=2.8.0
+mlflow~=2.8.0
+deepchecks==0.17.5
+pytorch-ignite==0.4.13
+opencv-python==4.8.1.78
+scikit-image==0.22.0
+
+
diff --git a/src/data/deepchecks_validations.py b/src/data/deepchecks_validations.py
@@ -0,0 +1,63 @@
+
+import os
+import shutil
+from datasets import load_dataset
+from deepchecks.vision import classification_dataset_from_directory
+from deepchecks.vision.suites import train_test_validation
+
+# loads dataset for particular split
+def loadDataSet(name, split):
+    return load_dataset(name, split=split)
+
+# return the path of the cache images for particular split
+def pathToCopy(src, token):
+    pathList = src.split(os.sep)
+    result = []
+    while len(pathList) > 0:
+        if pathList[0] == token:
+            break
+        result.append(pathList.pop(0))
+
+    return os.path.join('/', *result)
+
+# copy images to data/raw
+def copyToRaw(src, out):
+    shutil.copytree(src, out, dirs_exist_ok=True)
+
+# prepare data for the analysis
+def prepareData(dsName, split, out):
+    # load test data
+    ds = loadDataSet(dsName, split)
+    # get the path of the cashed images
+    dsPath = pathToCopy(ds[0]["image_file_path"], split)
+    print(dsPath)
+    # copy test images from cache to raw folder
+    copyToRaw(dsPath, out)
+
+# conduct the analysis
+def runTrainTestValidation(src):
+    train, test = classification_dataset_from_directory( 
+        root=src,
+        object_type='VisionData', image_extension='jpg')
+
+    suite = train_test_validation()
+    result = suite.run(train_dataset=train, test_dataset=test)
+    result.save_as_html('output.html')
+    print("TEST DONE!")
+
+# clear the data used for the analysis
+def clearData(src):
+    shutil.rmtree(src)
+
+def main():
+    datasetName ="beans"
+    outPath = os.path.join('data', 'raw')
+    prepareData(datasetName, 'train', outPath)
+    prepareData(datasetName, 'test', outPath)
+    runTrainTestValidation(outPath)
+    clearData(outPath)
+
+
+if __name__ == '__main__':
+    main()
+
diff --git a/src/data/output.html b/src/data/output.html
diff --git a/src/web/app.py b/src/web/app.py
@@ -54,7 +54,8 @@ def upload_file():
     file.save(file_path)
 
     #print("The received argument variable is: ", adress)
-    # MD change : (Pau instructions added)
+    # MD change : (Pau instructions added) 
+    # v
     command = 'curl -X POST -H "Content-Type: multipart/form-data" -H "Accept: application/json" -F "beans_img=@{}" host.docker.internal:443/make_prediction'.format(file_path.replace("\\", "\\\\"))
 
     app.logger.info(command)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -134,3 +134,4 @@ The data has been sourced from repository at huggingface (https://huggingface.co
		url="https://github.com/AI-Lab-Makerere/ibean/"
		}
		```