First Commit

Fematich · Jun 18, 2018 · 4d5ea2a · 4d5ea2a
commit 4d5ea2a
Show file tree

Hide file tree

Showing 13 changed files with 729 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,106 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+*config.py*
+*secrets.py*
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
diff --git a/README.md b/README.md
@@ -0,0 +1,67 @@
+tf.Transform example for building digital twin
+====================
+
+This repository is designed to quickly get you started with Machine Learning projects on Google Cloud Platform using tf.Transform.
+This code repository is linked to [this blogpost]([https://www.google.com).
+
+### Functionalities
+- preprocessing pipeline using tf.Transform (with Apache Beam) that runs on Cloud Dataflow or locally
+- model training (with Tensorflow) that runs locally or on ML Engine
+- ready to deploy saved models to deploy on ML Engine
+- starter code to use the saved model on ML Engine
+
+### Install dependencies
+**Note** You will need a Linux or Mac environment with Python 2.7.x to install the dependencies <sup>[1](#myfootnote1)</sup>.
+
+Install the following dependencies:
+ * Install [Cloud SDK](https://cloud.google.com/sdk/)
+ * Install [gcloud](https://cloud.google.com/sdk/gcloud/)
+ * ```pip install -r requirements.txt```
+
+# Getting started
+
+You need to complete the following parts to run the code:
+- add trainer/secrets.py with your `PROJECT_ID` and `BUCKET` variable
+- upload data to your buckets, you can upload data/test.csv to test this code
+
+## Preprocess
+
+You can run preprocess.py in the cloud using:
+```
+python preprocess.py --cloud
+      
+```
+
+To iterate/test your code, you can also run it locally on a sample of the dataset:
+```
+python preprocess.py
+```
+
+## Training Tensorflow model
+You can submit a ML Engine training job with:
+```
+gcloud ml-engine jobs submit training my_job \
+                --module-name trainer.task \
+                --staging-bucket gs://<staging_bucket> \
+                --package-path trainer
+```
+Testing it locally:
+```
+gcloud ml-engine local train --package-path trainer \
+                           --module-name trainer.task
+```
+
+## Deploy your trained model
+To deploy your model to ML Engine
+```
+gcloud ml-engine models create digitaltwin
+gcloud ml-engine versions create v1 --model=digitaltwin --origin=ORIGIN
+```
+To test the deployed model:
+```
+python predict.py
+```
+
+
+<a name="myfootnote1">1</a>: This code requires both Tensorflow and Apache Beam. Currently Tensorflow on Windows only supports Python 3.5.x and 
+and Apache Beam doesn't support Python 3.x yet.
diff --git a/data/input_data.csv b/data/input_data.csv
@@ -0,0 +1,16 @@
+BatchId;ButterMass;ButterTemperature;SugarMass;SugarHumidity;FlourMass;FlourHumidity;HeatingTime;MixingSpeed;MixingTime
+1;121;20;200;0.22;50;0.23;50;Max Speed;200
+2;244;23;410;0.19;99;0.21;80;Medium Speed;450
+3;110;26;190;0.20;46;0.19;33;Low Speed;210
+4;121;20;200;0.22;50;0.23;50;Max Speed;200
+5;244;23;410;0.19;99;0.21;80;Medium Speed;450
+6;110;26;190;0.20;46;0.19;33;Low Speed;210
+7;121;20;200;0.22;50;0.23;50;Max Speed;200
+8;244;23;410;0.19;99;0.21;80;Medium Speed;450
+9;110;26;190;0.20;46;0.19;33;Low Speed;210
+10;121;20;200;0.22;50;0.23;50;Max Speed;200
+11;244;23;410;0.19;99;0.21;80;Medium Speed;450
+12;110;26;190;0.20;46;0.19;33;Low Speed;210
+13;121;20;200;0.22;50;0.23;50;Max Speed;200
+14;244;23;410;0.19;99;0.21;80;Medium Speed;450
+15;110;26;190;0.20;46;0.19;33;Low Speed;210
diff --git a/data/output_data.csv b/data/output_data.csv
@@ -0,0 +1,16 @@
+BatchId;TotalVolume;Density;Temperature;Humidity;Energy;Problems
+1;305;1.2;45;0.26;0.302;No
+2;603;1.4;47;0.24;0.599;Yes, some chunks remain
+3;301;1.1;42;0.24;0.312;No
+4;305;1.2;45;0.26;0.302;No
+5;603;1.4;47;0.24;0.599;Yes, some chunks remain
+6;301;1.1;42;0.24;0.312;No
+7;305;1.2;45;0.26;0.302;No
+8;603;1.4;47;0.24;0.599;Yes, some chunks remain
+9;301;1.1;42;0.24;0.312;No
+10;305;1.2;45;0.26;0.302;No
+11;603;1.4;47;0.24;0.599;Yes, some chunks remain
+12;301;1.1;42;0.24;0.312;No
+13;305;1.2;45;0.26;0.302;No
+14;603;1.4;47;0.24;0.599;Yes, some chunks remain
+15;301;1.1;42;0.24;0.312;No
diff --git a/predict.py b/predict.py
@@ -0,0 +1,57 @@
+from googleapiclient import discovery
+
+from trainer.config import PROJECT_ID
+
+
+def get_predictions(project, model, instances, version=None):
+    """Send json data to a deployed model for prediction.
+
+    Args:
+        project (str): GCP project where the ML Engine Model is deployed.
+        model (str): model name
+        instances ([Mapping[str: Any]]): Keys should be the names of Tensors
+            your deployed model expects as inputs. Values should be datatypes
+            convertible to Tensors, or (potentially nested) lists of datatypes
+            convertible to tensors.
+        version (str) version of the model to target
+
+    Returns:
+        Mapping[str: any]: dictionary of prediction results defined by the
+            model.
+
+    """
+    service = discovery.build('ml', 'v1')
+    name = 'projects/{}/models/{}'.format(project, model)
+
+    if version is not None:
+        name += '/versions/{}'.format(version)
+
+    response = service.projects().predict(
+        name=name,
+        body={'instances': instances}
+    ).execute()
+
+    if 'error' in response:
+        raise RuntimeError(response['error'])
+
+    return response['predictions']
+
+
+if __name__ == "__main__":
+    predictions = get_predictions(
+        project=PROJECT_ID,
+        model="digitaltwin",
+        instances=[
+            {
+                'ButterMass':120,
+                'ButterTemperature': 20,
+                'SugarMass': 200,
+                'SugarHumidity': 0.22,
+                'FlourMass': 50,
+                'FlourHumidity': 0.23,
+                'HeatingTime': 50,
+                'MixingSpeed': 'Max Speed',
+                'MixingTime': 200,
+            }]
+    )
+    print(predictions)