Skip to content

Commit

Permalink
Enable automation test for all feature store notebooks (#2657)
Browse files Browse the repository at this point in the history
* Enable automation test for all feature store notebooks

* Update feature store uai

* Update uai name

* Add test for 5th notebook

* Convert 5th notebook to py

* Fix the syntax error

* Update random generator

* Update redis name

* Change conda file used for test

* Update to use dynamic version

* Fix the format

* Use the same conda for all notebooks

* Check mounted folder

* Exclude 5th notebook

* Update feature_version to version

* Sample updates for AML Spark managed VNet GA. (#2656)

* Sample updates for AML Spark managed VNet GA.

* Updated notebook title. Triggering build to test.

* Updated workspace name to include timestamp. Triggering build to test.

* Shortened VNet workspace name to meet validation.

* README.md clean-up and fixes for AML Spark samples.

* README.md clean-up and fixes for AML Spark samples.

* chore: Remove 'Send IcM on Failure' step from tutorial workflows (#2670)

* chore: Remove 'Send IcM on Failure' step that hasn't been in use for a while

* chore: Run tutorials/readme.py

* chore: Remove unused local 'generate-icm' action

* update cron schedules to avoid endpoint deletion from automated resource cleanup (#2672)

* change starting hour for workflow

* update cron for text mc mlflow

* use python 3.8.* (#2678)

* use python 3.8.18

* Update cleanup.yml

* update usepython v4

* Update workstation_env.yml (#2680)

* Examples for online inference for LLaVA (#2671)

* Rough version of ITT notebook.

* Working version of the notebook.

* Minor improvements.

* Rough version of the CLI demo.

* Reformat with black.

* Remove mode parameter in call to prepare_data.py.

* Make code cells easier to read.

* Minor readability improvement.

* Format with black.

* Minor fixes and reformatting.

* Reformat with black.

* Update workstation_env.yml (#2681)

delete extraneous space

* update vision FT examples corresponding to recent component update (#2650)

* updating changes with ME component

* updating MMOD as well

* update MMOD cli

* separate section for model evaluation component

* remove ds ort true for classification

* black check

* Remove v1 references (#2675)

* Remove v1 samples from v2 repo

* Remove v1 from table of contents

* Remove v1 test files

* Remove v1 test files

* Remove v1 folder from README

* Remove v1 folder

* Remove v1 tests

* Delete v1 files

* Restore custom-container directory

* Restore custom-container/triton directory

* Update the notebook format

---------

Co-authored-by: Yogi Pandey <[email protected]>
Co-authored-by: kdestin <[email protected]>
Co-authored-by: Rehaan Bhimani <[email protected]>
Co-authored-by: Ada <[email protected]>
Co-authored-by: Sheri Gilley <[email protected]>
Co-authored-by: rdondera-microsoft <[email protected]>
Co-authored-by: shubhamiit <[email protected]>
Co-authored-by: Diondra <[email protected]>
  • Loading branch information
9 people authored Sep 27, 2023
1 parent 55dffe9 commit b19a056
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 52 deletions.
36 changes: 23 additions & 13 deletions sdk/python/featurestore_sample/featurestore_sdk_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,35 @@

spark = SparkSession.builder.appName("AccessData").getOrCreate()

import os

for path, subdirs, files in os.walk("./"):
for name in files:
print(os.path.join(path, name))

print("=======Test Notebook 1============")
with open(
"notebooks/sdk_only/1. Develop a feature set and register with managed feature store.py"
) as f:
exec(f.read())

## Enable test for notebook 1 first
# print("=======Test Notebook 2============")
# with open(
# "notebooks/sdk_only/2. Enable materialization and backfill feature data.py"
# ) as f:
# exec(f.read())
print("=======Test Notebook 2============")
with open(
"notebooks/sdk_only/2. Enable materialization and backfill feature data.py"
) as f:
exec(f.read())

# print("=======Test Notebook 3============")
# with open("notebooks/sdk_only/3. Experiment and train models using features.py") as f:
# exec(f.read())
print("=======Test Notebook 3============")
with open("notebooks/sdk_only/3. Experiment and train models using features.py") as f:
exec(f.read())

print("=======Test Notebook 4============")
with open(
"notebooks/sdk_only/4. Enable recurrent materialization and run batch inference.py"
) as f:
exec(f.read())

# print("=======Test Notebook 3============")
# with open(
# "notebooks/sdk_only/4. Enable recurrent materialization and run batch inference.py"
# ) as f:
# # exclude 5th notebook for now
# print("=======Test Notebook 5============")
# with open("notebooks/sdk_only/5. Enable online store and run online inference.py") as f:
# exec(f.read())
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,8 @@
"featurestore_name = \"my-featurestore\"\n",
"featurestore_location = \"eastus\"\n",
"featurestore_subscription_id = os.environ[\"AZUREML_ARM_SUBSCRIPTION\"]\n",
"featurestore_resource_group_name = os.environ[\"AZUREML_ARM_RESOURCEGROUP\"]"
"featurestore_resource_group_name = os.environ[\"AZUREML_ARM_RESOURCEGROUP\"]\n",
"version = \"<VERSION>\""
]
},
{
Expand Down Expand Up @@ -698,7 +699,7 @@
"\n",
"account_entity_config = FeatureStoreEntity(\n",
" name=\"account\",\n",
" version=\"1\",\n",
" version=version,\n",
" index_columns=[DataColumn(name=\"accountID\", type=DataColumnType.STRING)],\n",
" stage=\"Development\",\n",
" description=\"This entity represents user account index key accountID.\",\n",
Expand Down Expand Up @@ -750,9 +751,9 @@
"\n",
"transaction_fset_config = FeatureSet(\n",
" name=\"transactions\",\n",
" version=\"1\",\n",
" version=version,\n",
" description=\"7-day and 3-day rolling aggregation of transactions featureset\",\n",
" entities=[\"azureml:account:1\"],\n",
" entities=[f\"azureml:account:{version}\"],\n",
" stage=\"Development\",\n",
" specification=FeatureSetSpecification(path=transactions_featureset_spec_folder),\n",
" tags={\"data_type\": \"nonPII\"},\n",
Expand Down Expand Up @@ -878,7 +879,7 @@
"outputs": [],
"source": [
"# look up the featureset by providing name and version\n",
"transactions_featureset = featurestore.feature_sets.get(\"transactions\", \"1\")\n",
"transactions_featureset = featurestore.feature_sets.get(\"transactions\", version)\n",
"# list its features\n",
"transactions_featureset.features"
]
Expand Down Expand Up @@ -952,8 +953,8 @@
"\n",
"# you can also specify features in string form: featurestore:featureset:version:feature\n",
"more_features = [\n",
" \"transactions:1:transaction_3d_count\",\n",
" \"transactions:1:transaction_amount_3d_avg\",\n",
" f\"transactions:{version}:transaction_3d_count\",\n",
" f\"transactions:{version}:transaction_amount_3d_avg\",\n",
"]\n",
"\n",
"more_features = featurestore.resolve_feature_uri(more_features)\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@
"project_ws_sub_id = os.environ[\"AZUREML_ARM_SUBSCRIPTION\"]\n",
"project_ws_rg = os.environ[\"AZUREML_ARM_RESOURCEGROUP\"]\n",
"project_ws_name = os.environ[\"AZUREML_ARM_WORKSPACE_NAME\"]\n",
"\n",
"version = \"<VERSION>\"\n",
"# connect to the project workspace\n",
"ws_client = MLClient(\n",
" AzureMLOnBehalfOfCredential(), project_ws_sub_id, project_ws_rg, project_ws_name\n",
Expand Down Expand Up @@ -562,7 +562,7 @@
"# User assigned managed identity values. Optionally you may change the values.\n",
"uai_subscription_id = os.environ[\"AZUREML_ARM_SUBSCRIPTION\"]\n",
"uai_resource_group_name = os.environ[\"AZUREML_ARM_RESOURCEGROUP\"]\n",
"uai_name = \"fstoreuai\"\n",
"uai_name = \"<FEATURE_STORE_UAI_NAME>\"\n",
"uai_location = ws_location"
]
},
Expand Down Expand Up @@ -754,7 +754,10 @@
"transient": {
"deleting": false
}
}
},
"tags": [
"active-ipynb"
]
},
"outputs": [],
"source": [
Expand Down Expand Up @@ -868,7 +871,9 @@
" MaterializationComputeResource,\n",
")\n",
"\n",
"transactions_fset_config = fs_client._featuresets.get(name=\"transactions\", version=\"1\")\n",
"transactions_fset_config = fs_client._featuresets.get(\n",
" name=\"transactions\", version=version\n",
")\n",
"\n",
"transactions_fset_config.materialization_settings = MaterializationSettings(\n",
" offline_enabled=True,\n",
Expand Down Expand Up @@ -968,7 +973,7 @@
"\n",
"poller = fs_client.feature_sets.begin_backfill(\n",
" name=\"transactions\",\n",
" version=\"1\",\n",
" version=version,\n",
" feature_window_start_time=st,\n",
" feature_window_end_time=ed,\n",
")\n",
Expand All @@ -978,7 +983,11 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": [
"active-ipynb"
]
},
"outputs": [],
"source": [
"# get the job URL, and stream the job logs\n",
Expand Down Expand Up @@ -1019,7 +1028,7 @@
"outputs": [],
"source": [
"# look up the featureset by providing name and version\n",
"transactions_featureset = featurestore.feature_sets.get(\"transactions\", \"1\")\n",
"transactions_featureset = featurestore.feature_sets.get(\"transactions\", version)\n",
"display(transactions_featureset.to_spark_dataframe().head(5))"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@
"project_ws_sub_id = os.environ[\"AZUREML_ARM_SUBSCRIPTION\"]\n",
"project_ws_rg = os.environ[\"AZUREML_ARM_RESOURCEGROUP\"]\n",
"project_ws_name = os.environ[\"AZUREML_ARM_WORKSPACE_NAME\"]\n",
"version = \"<VERSION>\"\n",
"\n",
"# connect to the project workspace\n",
"ws_client = MLClient(\n",
Expand Down Expand Up @@ -612,7 +613,7 @@
"outputs": [],
"source": [
"# get the registered transactions feature set, version 1\n",
"transactions_featureset = featurestore.feature_sets.get(\"transactions\", \"1\")\n",
"transactions_featureset = featurestore.feature_sets.get(\"transactions\", version)\n",
"# Notice that account feature set spec is in your local dev environment (this notebook): not registered with feature store yet\n",
"features = [\n",
" accounts_featureset_spec.get_feature(\"accountAge\"),\n",
Expand Down Expand Up @@ -717,9 +718,9 @@
"\n",
"accounts_fset_config = FeatureSet(\n",
" name=\"accounts\",\n",
" version=\"1\",\n",
" version=version,\n",
" description=\"accounts featureset\",\n",
" entities=[\"azureml:account:1\"],\n",
" entities=[f\"azureml:account:{version}\"],\n",
" stage=\"Development\",\n",
" specification=FeatureSetSpecification(path=accounts_featureset_spec_folder),\n",
" tags={\"data_type\": \"nonPII\"},\n",
Expand Down Expand Up @@ -764,7 +765,7 @@
"outputs": [],
"source": [
"# look up the featureset by providing name and version\n",
"accounts_featureset = featurestore.feature_sets.get(\"accounts\", \"1\")\n",
"accounts_featureset = featurestore.feature_sets.get(\"accounts\", version)\n",
"# get access to the feature data\n",
"accounts_feature_df = accounts_featureset.to_spark_dataframe()\n",
"display(accounts_feature_df.head(5))\n",
Expand Down Expand Up @@ -853,7 +854,7 @@
" print(fs)\n",
"\n",
"# See properties of the transactions featureset including list of features\n",
"featurestore.feature_sets.get(name=\"transactions\", version=\"1\").features"
"featurestore.feature_sets.get(name=\"transactions\", version=version).features"
]
},
{
Expand Down Expand Up @@ -914,8 +915,8 @@
"\n",
"# you can also specify features in string form: featurestore:featureset:version:feature\n",
"more_features = [\n",
" \"accounts:1:numPaymentRejects1dPerUser\",\n",
" \"transactions:1:transaction_amount_7d_avg\",\n",
" f\"accounts:{version}:numPaymentRejects1dPerUser\",\n",
" f\"transactions:{version}:transaction_amount_7d_avg\",\n",
"]\n",
"\n",
"more_features = featurestore.resolve_feature_uri(more_features)\n",
Expand Down Expand Up @@ -1027,7 +1028,10 @@
"transient": {
"deleting": false
}
}
},
"tags": [
"active-ipynb"
]
},
"outputs": [],
"source": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@
"project_ws_sub_id = os.environ[\"AZUREML_ARM_SUBSCRIPTION\"]\n",
"project_ws_rg = os.environ[\"AZUREML_ARM_RESOURCEGROUP\"]\n",
"project_ws_name = os.environ[\"AZUREML_ARM_WORKSPACE_NAME\"]\n",
"version = \"<VERSION>\"\n",
"\n",
"# connect to the project workspace\n",
"ws_client = MLClient(\n",
Expand Down Expand Up @@ -374,7 +375,9 @@
"from datetime import datetime\n",
"from azure.ai.ml.entities import RecurrenceTrigger\n",
"\n",
"transactions_fset_config = fs_client.feature_sets.get(name=\"transactions\", version=\"1\")\n",
"transactions_fset_config = fs_client.feature_sets.get(\n",
" name=\"transactions\", version=version\n",
")\n",
"\n",
"# create a schedule that runs the materialization job every 3 hours\n",
"transactions_fset_config.materialization_settings.schedule = RecurrenceTrigger(\n",
Expand Down Expand Up @@ -483,7 +486,10 @@
"transient": {
"deleting": false
}
}
},
"tags": [
"active-ipynb"
]
},
"outputs": [],
"source": [
Expand Down Expand Up @@ -539,7 +545,10 @@
"transient": {
"deleting": false
}
}
},
"tags": [
"active-ipynb"
]
},
"outputs": [],
"source": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
"\n",
"# Please update the dir to ./Users/{your-alias} (or any custom directory you uploaded the samples to).\n",
"# You can find the name from the directory structure inm the left navigation panel.\n",
"root_dir = \"./Users/<your user alias>/featurestore_sample\"\n",
"root_dir = \"./Users/<your_user_alias>/featurestore_sample\"\n",
"\n",
"if os.path.isdir(root_dir):\n",
" print(\"The folder exists.\")\n",
Expand Down Expand Up @@ -171,6 +171,7 @@
"project_ws_sub_id = os.environ[\"AZUREML_ARM_SUBSCRIPTION\"]\n",
"project_ws_rg = os.environ[\"AZUREML_ARM_RESOURCEGROUP\"]\n",
"project_ws_name = os.environ[\"AZUREML_ARM_WORKSPACE_NAME\"]\n",
"version = \"<VERSION>\"\n",
"\n",
"# Connect to the project workspace\n",
"ws_client = MLClient(\n",
Expand Down Expand Up @@ -199,7 +200,7 @@
"from azure.ai.ml.identity import AzureMLOnBehalfOfCredential\n",
"\n",
"# Feature store\n",
"featurestore_name = \"featurestore\" # use the same name from part #1 of the tutorial\n",
"featurestore_name = \"my-featurestore\" # use the same name from part #1 of the tutorial\n",
"featurestore_subscription_id = os.environ[\"AZUREML_ARM_SUBSCRIPTION\"]\n",
"featurestore_resource_group_name = os.environ[\"AZUREML_ARM_RESOURCEGROUP\"]\n",
"\n",
Expand Down Expand Up @@ -282,7 +283,7 @@
"\n",
"redis_subscription_id = os.environ[\"AZUREML_ARM_SUBSCRIPTION\"]\n",
"redis_resource_group_name = os.environ[\"AZUREML_ARM_RESOURCEGROUP\"]\n",
"redis_name = \"redis1\"\n",
"redis_name = \"<REDIS_NAME>\"\n",
"redis_location = ws_location"
]
},
Expand Down Expand Up @@ -598,7 +599,7 @@
"\n",
"# Turn on both offline and online materialization on the \"accounts\" featureset.\n",
"\n",
"accounts_fset_config = fs_client._featuresets.get(name=\"accounts\", version=\"1\")\n",
"accounts_fset_config = fs_client._featuresets.get(name=\"accounts\", version=version)\n",
"\n",
"accounts_fset_config.materialization_settings = MaterializationSettings(\n",
" offline_enabled=True,\n",
Expand Down Expand Up @@ -651,7 +652,7 @@
"\n",
"poller = fs_client.feature_sets.begin_backfill(\n",
" name=\"accounts\",\n",
" version=\"1\",\n",
" version=version,\n",
" feature_window_start_time=st,\n",
" feature_window_end_time=ed,\n",
")\n",
Expand All @@ -676,7 +677,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"name": "track-accounts-backfill"
"name": "track-accounts-backfill",
"tags": [
"active-ipynb"
]
},
"outputs": [],
"source": [
Expand Down Expand Up @@ -720,7 +724,9 @@
"source": [
"# Enable materialization to online store for the \"transactions\" feature set.\n",
"\n",
"transactions_fset_config = fs_client._featuresets.get(name=\"transactions\", version=\"1\")\n",
"transactions_fset_config = fs_client._featuresets.get(\n",
" name=\"transactions\", version=version\n",
")\n",
"transactions_fset_config.materialization_settings.online_enabled = True\n",
"\n",
"fs_poller = fs_client.feature_sets.begin_create_or_update(transactions_fset_config)\n",
Expand Down Expand Up @@ -765,7 +771,7 @@
"\n",
"poller = fs_client.feature_sets.begin_backfill(\n",
" name=\"transactions\",\n",
" version=\"1\",\n",
" version=version,\n",
" feature_window_start_time=st,\n",
" feature_window_end_time=ed,\n",
")\n",
Expand Down Expand Up @@ -795,7 +801,10 @@
"transient": {
"deleting": false
}
}
},
"tags": [
"active-ipynb"
]
},
"outputs": [],
"source": [
Expand Down
Loading

0 comments on commit b19a056

Please sign in to comment.