diff --git a/README.md b/README.md index bd87b0a..d8522a3 100644 --- a/README.md +++ b/README.md @@ -134,3 +134,4 @@ The data has been sourced from repository at huggingface (https://huggingface.co url="https://github.com/AI-Lab-Makerere/ibean/" } ``` + diff --git a/requirements.txt b/requirements.txt index aa91466..80aefc0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,4 +19,10 @@ pydantic~=1.10.13 flask~=3.0.0 python-dotenv~=1.0.0 pandas~=2.0.3 -mlflow~=2.8.0 \ No newline at end of file +mlflow~=2.8.0 +deepchecks==0.17.5 +pytorch-ignite==0.4.13 +opencv-python==4.8.1.78 +scikit-image==0.22.0 + + diff --git a/src/data/deepchecks_validations.py b/src/data/deepchecks_validations.py new file mode 100644 index 0000000..3e736cc --- /dev/null +++ b/src/data/deepchecks_validations.py @@ -0,0 +1,63 @@ + +import os +import shutil +from datasets import load_dataset +from deepchecks.vision import classification_dataset_from_directory +from deepchecks.vision.suites import train_test_validation + +# loads dataset for particular split +def loadDataSet(name, split): + return load_dataset(name, split=split) + +# return the path of the cache images for particular split +def pathToCopy(src, token): + pathList = src.split(os.sep) + result = [] + while len(pathList) > 0: + if pathList[0] == token: + break + result.append(pathList.pop(0)) + + return os.path.join('/', *result) + +# copy images to data/raw +def copyToRaw(src, out): + shutil.copytree(src, out, dirs_exist_ok=True) + +# prepare data for the analysis +def prepareData(dsName, split, out): + # load test data + ds = loadDataSet(dsName, split) + # get the path of the cashed images + dsPath = pathToCopy(ds[0]["image_file_path"], split) + print(dsPath) + # copy test images from cache to raw folder + copyToRaw(dsPath, out) + +# conduct the analysis +def runTrainTestValidation(src): + train, test = classification_dataset_from_directory( + root=src, + object_type='VisionData', image_extension='jpg') + + suite = train_test_validation() + result = suite.run(train_dataset=train, test_dataset=test) + result.save_as_html('output.html') + print("TEST DONE!") + +# clear the data used for the analysis +def clearData(src): + shutil.rmtree(src) + +def main(): + datasetName ="beans" + outPath = os.path.join('data', 'raw') + prepareData(datasetName, 'train', outPath) + prepareData(datasetName, 'test', outPath) + runTrainTestValidation(outPath) + clearData(outPath) + + +if __name__ == '__main__': + main() + diff --git a/src/data/output.html b/src/data/output.html new file mode 100644 index 0000000..c3d0367 --- /dev/null +++ b/src/data/output.html @@ -0,0 +1,635 @@ + + + + + + Train Test Validation Suite + + + + + + + + + + + + \ No newline at end of file diff --git a/src/web/app.py b/src/web/app.py index d70b7e8..3933edb 100644 --- a/src/web/app.py +++ b/src/web/app.py @@ -54,7 +54,8 @@ def upload_file(): file.save(file_path) #print("The received argument variable is: ", adress) - # MD change : (Pau instructions added) + # MD change : (Pau instructions added) + # v command = 'curl -X POST -H "Content-Type: multipart/form-data" -H "Accept: application/json" -F "beans_img=@{}" host.docker.internal:443/make_prediction'.format(file_path.replace("\\", "\\\\")) app.logger.info(command)