-
Notifications
You must be signed in to change notification settings - Fork 32
/
data_preprocessing.py
45 lines (37 loc) · 1.39 KB
/
data_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import neptune
from sklearn.datasets import fetch_lfw_people
from utils import *
# Download dataset
dataset = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
# (Neptune) Create a new run
run = neptune.init_run(
monitoring_namespace="monitoring/preprocessing",
dependencies="requirements.txt",
)
# Get dataset details
dataset_config = {
"target_names": str(dataset.target_names.tolist()),
"n_classes": dataset.target_names.shape[0],
"n_samples": dataset.images.shape[0],
"height": dataset.images.shape[1],
"width": dataset.images.shape[2],
}
# (Neptune) Set up "preprocessing" namespace inside the run.
# This will be the base namespace where all the preprocessing metadata is logged.
preprocessing_handler = run["preprocessing"]
# (Neptune) Log dataset details
preprocessing_handler["dataset/config"] = dataset_config
# Preprocess dataset
dataset_transform = Preprocessing(
dataset,
dataset_config["n_samples"],
dataset_config["target_names"],
dataset_config["n_classes"],
(dataset_config["height"], dataset_config["width"]),
)
path_to_scaler = dataset_transform.scale_data()
path_to_features = dataset_transform.create_and_save_features(data_filename="features")
dataset_transform.describe()
# (Neptune) Log scaler and features files
preprocessing_handler["dataset/scaler"].upload(path_to_scaler)
preprocessing_handler["dataset/features"].upload(path_to_features)