diff --git a/workflows/train_mnist_wf/README.md b/workflows/train_mnist_wf/README.md index 4539013..1ed944f 100644 --- a/workflows/train_mnist_wf/README.md +++ b/workflows/train_mnist_wf/README.md @@ -1,14 +1,28 @@ -## Change the following variable value before deploying the workflow on your workspace +## There are few pre-requisite we have fulfill before deploying the workflow -- Default value of `workspace_fqn` in main workflow function. -- Value of env variables `TFY_API_KEY` `TFY_HOST` in `task_config`. -- you can use virtual accout token as `TFY_API_KEY`, click [here](https://docs.truefoundry.com/docs/generating-truefoundry-api-keys#virtual-accounts) to learn about how to create virtual account. -- `host` value in `deploy.py` +### Creating the ml repo and giving the workspace the access to that ml repo +- First create a ml repo where you want to log the models. To learn about how to create a ml repo, click [here](https://docs.truefoundry.com/docs/creating-a-ml-repo#/). +- Give ml repo access to the workspace where you will be deploying your workflow and the model. To know about how to give access click [here](https://docs.truefoundry.com/docs/key-concepts#/grant-access-of-ml-repo-to-workspace) + +### Setting the value of default variables + +- Set the value of env variables `TFY_API_KEY` `TFY_HOST` in `task_config` in `train-deploy-workflow.py` file. +- you can use virtual account token as `TFY_API_KEY`, click [here](https://docs.truefoundry.com/docs/generating-truefoundry-api-keys#virtual-accounts) to learn about how to create a virtual account. +- `host` value in `Port` field in `deploy.py` file ## Deploying the workflow You can deploy the workflow using the following command, make sure your truefoudry cli version is more thatn `4.0.0`. ```bash -tfy deploy workflow --name --file --workspace-fqn -``` \ No newline at end of file +tfy deploy workflow --name --file train-deploy-workflow.py --workspace-fqn +``` +**Make sure you have workflow helm chart installed in the workspace in which you are deploying workflow** + +## Executing the workflow +The workflow takes following arguments as input while executing the workflow. +`ml_repo`: The name of the ml repo where you want to deploy the model. The workspace should have access to this ml repo. +`workspace_fqn`: Workspace fqn where you want to deploy the model. +`epochs`: An array of integer which define the number of epoch you want to train the model for, each epoch will run with corresponding learning rate which you will give in `learning_rate` argument. The lenght of `epochs` and `learning_rate` shoud be same. +`learning_rate`: An array of float where each number is the learning rate you want your model to train with, corresponding to the epochs defined at same postion. +`accuracy_threshold`: The threshold value, so the workflow will deploy the model if its validation accuracy is greater than this threshold accuracy. \ No newline at end of file diff --git a/workflows/train_mnist_wf/train-deploy-workflow.py b/workflows/train_mnist_wf/train-deploy-workflow.py index 3c4e593..018f04c 100644 --- a/workflows/train_mnist_wf/train-deploy-workflow.py +++ b/workflows/train_mnist_wf/train-deploy-workflow.py @@ -15,6 +15,8 @@ resources=Resources(cpu_request=1.2, cpu_limit=1.2, memory_limit=3000, memory_request=3000, ephemeral_storage_limit=2000, ephemeral_storage_request=2000), service_account="default", env={ + "TF_CPP_MIN_LOG_LEVEL": "3", # suppress tensorflow warnings + "FLYTE_SDK_LOGGING_LEVEL": "40", "TFY_API_KEY": "", "TFY_HOST": "", } @@ -56,6 +58,7 @@ def train_model(epochs: int, learning_rate: float, data: Dict[str, np.array], ml model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"]) epochs = epochs + print(f"Started training the model for {epochs} epochs") history = model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test)) # Evaluate the model @@ -122,11 +125,11 @@ def model_not_found(threshold: float) -> str: @workflow -def model_training_workflow(ml_repo: str, epochs: List[int] = [2, 3, 5], learning_rate: List[float] = [0.1, 0.001, 0.001], accuracy_threshold: float = 0.15, workspace_fqn: str = "") -> Union[str, None]: +def model_training_workflow(ml_repo: str, workspace_fqn: str, epochs: List[int] = [2, 3, 5], learning_rate: List[float] = [0.1, 0.001, 0.001], accuracy_threshold: float = 0.15) -> Union[str, None]: data = fetch_data() train_model_function = partial(train_model, data=data, ml_repo=ml_repo) fqns = map_task(train_model_function, concurrency=2)(epochs=epochs, learning_rate=learning_rate) - best_fqn, is_best_model_found = get_run_fqn_of_best_model(fqns=fqns, threshold=accuracy_threshold) - message = conditional("Deploy best model").if_(is_best_model_found == True).then(deploy_model(run_fqn=best_fqn, workspace_fqn=workspace_fqn)).else_().then(model_not_found(threshold=accuracy_threshold)) + model_version_fqn, does_model_pass_threshold_accuracy = get_run_fqn_of_best_model(fqns=fqns, threshold=accuracy_threshold) + message = conditional("Deploy model").if_(does_model_pass_threshold_accuracy == True).then(deploy_model(run_fqn=model_version_fqn, workspace_fqn=workspace_fqn)).else_().then(model_not_found(threshold=accuracy_threshold)) return message \ No newline at end of file