From 4197e66a460ac7d3bba5fed58fe38908badf82f1 Mon Sep 17 00:00:00 2001 From: "Vaibhav Jain (VJ)" Date: Fri, 22 Nov 2024 13:06:20 -0800 Subject: [PATCH 1/5] Passing Default Storage for avoiding workspace network setting policies --- sdk/python/jobs/spark/setup_spark.sh | 6 +++++- .../spark/submit_spark_standalone_jobs_managed_vnet.ipynb | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sdk/python/jobs/spark/setup_spark.sh b/sdk/python/jobs/spark/setup_spark.sh index faea23617d9..95d38534dd4 100644 --- a/sdk/python/jobs/spark/setup_spark.sh +++ b/sdk/python/jobs/spark/setup_spark.sh @@ -51,10 +51,13 @@ then TIMESTAMP=`date +%m%d%H%M%S` AML_WORKSPACE_NAME=${AML_WORKSPACE_NAME}-vnet-$TIMESTAMP AZURE_STORAGE_ACCOUNT=${RESOURCE_GROUP}blobvnet + DEFAULT_STORAGE_ACCOUNT=${RESOURCE_GROUP}defaultvnet BLOB_CONTAINER_NAME="blobstoragevnetcontainer" GEN2_STORAGE_ACCOUNT_NAME=${RESOURCE_GROUP}gen2vnet ADLS_CONTAINER_NAME="gen2containervnet" + az storage account create -n $DEFAULT_STORAGE_ACCOUNT -g $RESOURCE_GROUP -l $LOCATION --sku Standard_LRS + az storage account create -n $AZURE_STORAGE_ACCOUNT -g $RESOURCE_GROUP -l $LOCATION --sku Standard_LRS az storage container create -n $BLOB_CONTAINER_NAME --account-name $AZURE_STORAGE_ACCOUNT @@ -80,7 +83,8 @@ then s//$ACCESS_KEY_SECRET_NAME/g; s//$BLOB_CONTAINER_NAME/g; s//$GEN2_STORAGE_ACCOUNT_NAME/g; - s//$ADLS_CONTAINER_NAME/g;" $2 + s//$ADLS_CONTAINER_NAME/g + s//$DEFAULT_STORAGE_ACCOUNT/g;" $2 # # elif [[ "$2" == *"run_interactive_session_notebook"* ]] diff --git a/sdk/python/jobs/spark/submit_spark_standalone_jobs_managed_vnet.ipynb b/sdk/python/jobs/spark/submit_spark_standalone_jobs_managed_vnet.ipynb index cfa920dfae9..eace1dce406 100644 --- a/sdk/python/jobs/spark/submit_spark_standalone_jobs_managed_vnet.ipynb +++ b/sdk/python/jobs/spark/submit_spark_standalone_jobs_managed_vnet.ipynb @@ -114,6 +114,9 @@ "region = \"\"\n", "# Enter Azure Blob storage account name for the outbound rule\n", "blob_storage_account = \"\"\n", + "default_storage_account = \"\"\n", + "\n", + "default_storage_resource_id = f\"/subscriptions/{subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Storage/storageAccounts/{default_storage_account}\"\n", "\n", "ws_mvnet = Workspace(\n", " name=ws_name,\n", @@ -121,6 +124,7 @@ " hbi_workspace=False,\n", " public_network_access=\"Disabled\", # Comment this out to enable Public Network Access\n", " tags=dict(purpose=\"demo\"),\n", + " storage_account=default_storage_resource_id,\n", ")\n", "\n", "ws_mvnet.managed_network = ManagedNetwork(\n", From 5da8f70062bf98921c99c56b8eca7cc0fb1f5b5a Mon Sep 17 00:00:00 2001 From: "Vaibhav Jain (VJ)" Date: Fri, 22 Nov 2024 14:56:21 -0800 Subject: [PATCH 2/5] trying quick fix --- sdk/python/jobs/spark/setup_spark.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/jobs/spark/setup_spark.sh b/sdk/python/jobs/spark/setup_spark.sh index 95d38534dd4..50842a04b3f 100644 --- a/sdk/python/jobs/spark/setup_spark.sh +++ b/sdk/python/jobs/spark/setup_spark.sh @@ -1,6 +1,6 @@ # SUBSCRIPTION_ID=$(az account show --query id -o tsv) -LOCATION=$(az ml workspace show --query location -o tsv) +LOCATION=eastus RESOURCE_GROUP=$(az group show --query name -o tsv) AML_WORKSPACE_NAME=$(az configure -l --query "[?name=='workspace'].value" -o tsv) API_VERSION="2022-05-01" @@ -49,7 +49,7 @@ AML_USER_MANAGED_ID_OID=$(az identity show --resource-group $RESOURCE_GROUP -n $ if [[ "$2" == *"managed_vnet"* ]] then TIMESTAMP=`date +%m%d%H%M%S` - AML_WORKSPACE_NAME=${AML_WORKSPACE_NAME}-vnet-$TIMESTAMP + AML_WORKSPACE_NAME=${AML_WORKSPACE_NAME}-spark-vnet-$TIMESTAMP AZURE_STORAGE_ACCOUNT=${RESOURCE_GROUP}blobvnet DEFAULT_STORAGE_ACCOUNT=${RESOURCE_GROUP}defaultvnet BLOB_CONTAINER_NAME="blobstoragevnetcontainer" From 323f4fd60ddb802e043823ab5319aa13111f5316 Mon Sep 17 00:00:00 2001 From: "Vaibhav Jain (VJ)" Date: Fri, 22 Nov 2024 14:56:58 -0800 Subject: [PATCH 3/5] fix --- sdk/python/jobs/spark/setup_spark.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/jobs/spark/setup_spark.sh b/sdk/python/jobs/spark/setup_spark.sh index 50842a04b3f..01aace7992c 100644 --- a/sdk/python/jobs/spark/setup_spark.sh +++ b/sdk/python/jobs/spark/setup_spark.sh @@ -1,6 +1,6 @@ # SUBSCRIPTION_ID=$(az account show --query id -o tsv) -LOCATION=eastus +LOCATION="eastus" RESOURCE_GROUP=$(az group show --query name -o tsv) AML_WORKSPACE_NAME=$(az configure -l --query "[?name=='workspace'].value" -o tsv) API_VERSION="2022-05-01" From 701483a03906a6cd2c992f9a2eb48cba0334d95f Mon Sep 17 00:00:00 2001 From: "Vaibhav Jain (VJ)" Date: Fri, 22 Nov 2024 15:11:51 -0800 Subject: [PATCH 4/5] fix --- sdk/python/jobs/spark/setup_spark.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/python/jobs/spark/setup_spark.sh b/sdk/python/jobs/spark/setup_spark.sh index 01aace7992c..9b6b5d4505a 100644 --- a/sdk/python/jobs/spark/setup_spark.sh +++ b/sdk/python/jobs/spark/setup_spark.sh @@ -48,10 +48,10 @@ AML_USER_MANAGED_ID_OID=$(az identity show --resource-group $RESOURCE_GROUP -n $ # if [[ "$2" == *"managed_vnet"* ]] then - TIMESTAMP=`date +%m%d%H%M%S` - AML_WORKSPACE_NAME=${AML_WORKSPACE_NAME}-spark-vnet-$TIMESTAMP + TIMESTAMP=`date +%m%d%H%M` + AML_WORKSPACE_NAME=${AML_WORKSPACE_NAME}-vnet-$TIMESTAMP AZURE_STORAGE_ACCOUNT=${RESOURCE_GROUP}blobvnet - DEFAULT_STORAGE_ACCOUNT=${RESOURCE_GROUP}defaultvnet + DEFAULT_STORAGE_ACCOUNT="sparkdefaultvnet" BLOB_CONTAINER_NAME="blobstoragevnetcontainer" GEN2_STORAGE_ACCOUNT_NAME=${RESOURCE_GROUP}gen2vnet ADLS_CONTAINER_NAME="gen2containervnet" From 48a013aaae3e72815ad56b90287a633b03b7301d Mon Sep 17 00:00:00 2001 From: "Vaibhav Jain (VJ)" Date: Fri, 13 Dec 2024 10:20:49 -0800 Subject: [PATCH 5/5] Spark examples to spark 3.4 --- .../spark/serverless-spark-pipeline-default-identity.yml | 2 +- .../spark/serverless-spark-pipeline-managed-identity.yml | 2 +- cli/jobs/spark/serverless-spark-pipeline-user-identity.yml | 2 +- .../spark/serverless-spark-standalone-default-identity.yml | 2 +- .../spark/serverless-spark-standalone-managed-identity.yml | 2 +- .../spark/serverless-spark-standalone-user-identity.yml | 2 +- cli/monitoring/advanced-model-monitoring.yaml | 2 +- cli/monitoring/custom-monitoring.yaml | 2 +- cli/monitoring/generation-safety-quality-monitoring.yaml | 2 +- cli/monitoring/model-monitoring-with-collected-data.yaml | 2 +- cli/monitoring/out-of-box-monitoring.yaml | 2 +- sdk/python/jobs/spark/setup_spark.sh | 2 +- sdk/python/jobs/spark/submit_spark_pipeline_jobs.ipynb | 6 +++--- sdk/python/jobs/spark/submit_spark_standalone_jobs.ipynb | 6 +++--- .../spark/submit_spark_standalone_jobs_managed_vnet.ipynb | 4 ++-- 15 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cli/jobs/spark/serverless-spark-pipeline-default-identity.yml b/cli/jobs/spark/serverless-spark-pipeline-default-identity.yml index 31b905d295f..579a5738cdb 100644 --- a/cli/jobs/spark/serverless-spark-pipeline-default-identity.yml +++ b/cli/jobs/spark/serverless-spark-pipeline-default-identity.yml @@ -22,4 +22,4 @@ jobs: resources: instance_type: standard_e8s_v3 - runtime_version: "3.3" + runtime_version: "3.4" diff --git a/cli/jobs/spark/serverless-spark-pipeline-managed-identity.yml b/cli/jobs/spark/serverless-spark-pipeline-managed-identity.yml index 6822ea9f9fa..945263cca43 100644 --- a/cli/jobs/spark/serverless-spark-pipeline-managed-identity.yml +++ b/cli/jobs/spark/serverless-spark-pipeline-managed-identity.yml @@ -25,4 +25,4 @@ jobs: resources: instance_type: standard_e8s_v3 - runtime_version: "3.3" + runtime_version: "3.4" diff --git a/cli/jobs/spark/serverless-spark-pipeline-user-identity.yml b/cli/jobs/spark/serverless-spark-pipeline-user-identity.yml index b5ec9d05ae6..ef8507a10a7 100644 --- a/cli/jobs/spark/serverless-spark-pipeline-user-identity.yml +++ b/cli/jobs/spark/serverless-spark-pipeline-user-identity.yml @@ -25,4 +25,4 @@ jobs: resources: instance_type: standard_e8s_v3 - runtime_version: "3.3" + runtime_version: "3.4" diff --git a/cli/jobs/spark/serverless-spark-standalone-default-identity.yml b/cli/jobs/spark/serverless-spark-standalone-default-identity.yml index 49a21c4ed4d..31d39196c48 100644 --- a/cli/jobs/spark/serverless-spark-standalone-default-identity.yml +++ b/cli/jobs/spark/serverless-spark-standalone-default-identity.yml @@ -31,5 +31,5 @@ args: >- resources: instance_type: standard_e4s_v3 - runtime_version: "3.3" + runtime_version: "3.4" \ No newline at end of file diff --git a/cli/jobs/spark/serverless-spark-standalone-managed-identity.yml b/cli/jobs/spark/serverless-spark-standalone-managed-identity.yml index 1f4af1781fd..04da3bd18e9 100644 --- a/cli/jobs/spark/serverless-spark-standalone-managed-identity.yml +++ b/cli/jobs/spark/serverless-spark-standalone-managed-identity.yml @@ -34,5 +34,5 @@ identity: resources: instance_type: standard_e4s_v3 - runtime_version: "3.3" + runtime_version: "3.4" \ No newline at end of file diff --git a/cli/jobs/spark/serverless-spark-standalone-user-identity.yml b/cli/jobs/spark/serverless-spark-standalone-user-identity.yml index 86ce8695749..93482bb89a1 100644 --- a/cli/jobs/spark/serverless-spark-standalone-user-identity.yml +++ b/cli/jobs/spark/serverless-spark-standalone-user-identity.yml @@ -34,5 +34,5 @@ identity: resources: instance_type: standard_e4s_v3 - runtime_version: "3.3" + runtime_version: "3.4" \ No newline at end of file diff --git a/cli/monitoring/advanced-model-monitoring.yaml b/cli/monitoring/advanced-model-monitoring.yaml index 5f3c3031029..3b183eb3dbf 100644 --- a/cli/monitoring/advanced-model-monitoring.yaml +++ b/cli/monitoring/advanced-model-monitoring.yaml @@ -17,7 +17,7 @@ create_monitor: compute: instance_type: standard_e4s_v3 - runtime_version: "3.3" + runtime_version: "3.4" monitoring_target: ml_task: classification diff --git a/cli/monitoring/custom-monitoring.yaml b/cli/monitoring/custom-monitoring.yaml index ecb17e91a7a..2a4ac50cf07 100644 --- a/cli/monitoring/custom-monitoring.yaml +++ b/cli/monitoring/custom-monitoring.yaml @@ -10,7 +10,7 @@ trigger: create_monitor: compute: instance_type: "standard_e8s_v3" - runtime_version: "3.3" + runtime_version: "3.4" monitoring_signals: customSignal: diff --git a/cli/monitoring/generation-safety-quality-monitoring.yaml b/cli/monitoring/generation-safety-quality-monitoring.yaml index b39c0bde809..3871f7dcbb9 100644 --- a/cli/monitoring/generation-safety-quality-monitoring.yaml +++ b/cli/monitoring/generation-safety-quality-monitoring.yaml @@ -22,7 +22,7 @@ trigger: create_monitor: compute: instance_type: "standard_e8s_v3" - runtime_version: "3.3" + runtime_version: "3.4" monitoring_target: ml_task: classification endpoint_deployment_id: fake_endpoint_id diff --git a/cli/monitoring/model-monitoring-with-collected-data.yaml b/cli/monitoring/model-monitoring-with-collected-data.yaml index b39dbc477ad..b73d7728e3e 100644 --- a/cli/monitoring/model-monitoring-with-collected-data.yaml +++ b/cli/monitoring/model-monitoring-with-collected-data.yaml @@ -16,7 +16,7 @@ trigger: create_monitor: compute: instance_type: standard_e4s_v3 - runtime_version: "3.3" + runtime_version: "3.4" monitoring_target: ml_task: classification endpoint_deployment_id: azureml:fraud-detection-endpoint:fraud-detection-deployment diff --git a/cli/monitoring/out-of-box-monitoring.yaml b/cli/monitoring/out-of-box-monitoring.yaml index 4745ad7f1ed..a30491ae8fd 100644 --- a/cli/monitoring/out-of-box-monitoring.yaml +++ b/cli/monitoring/out-of-box-monitoring.yaml @@ -17,7 +17,7 @@ create_monitor: compute: # specify a spark compute for monitoring job instance_type: standard_e4s_v3 - runtime_version: "3.3" + runtime_version: "3.4" monitoring_target: ml_task: classification # model task type: [classification, regression, question_answering] diff --git a/sdk/python/jobs/spark/setup_spark.sh b/sdk/python/jobs/spark/setup_spark.sh index 9b6b5d4505a..46e1c982a6b 100644 --- a/sdk/python/jobs/spark/setup_spark.sh +++ b/sdk/python/jobs/spark/setup_spark.sh @@ -156,7 +156,7 @@ else az storage fs create -n $GEN2_FILE_SYSTEM --account-name $GEN2_STORAGE_NAME az synapse workspace create --name $SYNAPSE_WORKSPACE_NAME --resource-group $RESOURCE_GROUP --storage-account $GEN2_STORAGE_NAME --file-system $GEN2_FILE_SYSTEM --sql-admin-login-user $SQL_ADMIN_LOGIN_USER --sql-admin-login-password $RANDOM_STRING --location $LOCATION az role assignment create --role "Storage Blob Data Owner" --assignee $AML_USER_MANAGED_ID_OID --scope /subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROUP/providers/Microsoft.Storage/storageAccounts/$GEN2_STORAGE_NAME/blobServices/default/containers/$GEN2_FILE_SYSTEM - az synapse spark pool create --name $SPARK_POOL_NAME --workspace-name $SYNAPSE_WORKSPACE_NAME --resource-group $RESOURCE_GROUP --spark-version 3.3 --node-count 3 --node-size Medium --min-node-count 3 --max-node-count 10 --enable-auto-scale true + az synapse spark pool create --name $SPARK_POOL_NAME --workspace-name $SYNAPSE_WORKSPACE_NAME --resource-group $RESOURCE_GROUP --spark-version 3.4 --node-count 3 --node-size Medium --min-node-count 3 --max-node-count 10 --enable-auto-scale true az synapse workspace firewall-rule create --name allowAll --workspace-name $SYNAPSE_WORKSPACE_NAME --resource-group $RESOURCE_GROUP --start-ip-address 0.0.0.0 --end-ip-address 255.255.255.255 # diff --git a/sdk/python/jobs/spark/submit_spark_pipeline_jobs.ipynb b/sdk/python/jobs/spark/submit_spark_pipeline_jobs.ipynb index 1a336efc40d..e637706d08d 100644 --- a/sdk/python/jobs/spark/submit_spark_pipeline_jobs.ipynb +++ b/sdk/python/jobs/spark/submit_spark_pipeline_jobs.ipynb @@ -342,7 +342,7 @@ " spark_step.identity = ManagedIdentityConfiguration()\n", " spark_step.resources = {\n", " \"instance_type\": \"Standard_E8S_V3\",\n", - " \"runtime_version\": \"3.3.0\",\n", + " \"runtime_version\": \"3.4.0\",\n", " }\n", "\n", "\n", @@ -422,7 +422,7 @@ " spark_step.identity = UserIdentityConfiguration()\n", " spark_step.resources = {\n", " \"instance_type\": \"Standard_E8S_V3\",\n", - " \"runtime_version\": \"3.3.0\",\n", + " \"runtime_version\": \"3.4.0\",\n", " }\n", "\n", "\n", @@ -501,7 +501,7 @@ " spark_step.outputs.wrangled_data.mode = InputOutputModes.DIRECT\n", " spark_step.resources = {\n", " \"instance_type\": \"Standard_E8S_V3\",\n", - " \"runtime_version\": \"3.3.0\",\n", + " \"runtime_version\": \"3.4.0\",\n", " }\n", "\n", "\n", diff --git a/sdk/python/jobs/spark/submit_spark_standalone_jobs.ipynb b/sdk/python/jobs/spark/submit_spark_standalone_jobs.ipynb index 245c2671222..88c2cc2dade 100644 --- a/sdk/python/jobs/spark/submit_spark_standalone_jobs.ipynb +++ b/sdk/python/jobs/spark/submit_spark_standalone_jobs.ipynb @@ -267,7 +267,7 @@ " executor_instances=2,\n", " resources={\n", " \"instance_type\": \"Standard_E8S_V3\",\n", - " \"runtime_version\": \"3.3.0\",\n", + " \"runtime_version\": \"3.4.0\",\n", " },\n", " inputs={\n", " \"titanic_data\": Input(\n", @@ -329,7 +329,7 @@ " executor_instances=2,\n", " resources={\n", " \"instance_type\": \"Standard_E8S_V3\",\n", - " \"runtime_version\": \"3.3.0\",\n", + " \"runtime_version\": \"3.4.0\",\n", " },\n", " inputs={\n", " \"titanic_data\": Input(\n", @@ -391,7 +391,7 @@ " executor_instances=2,\n", " resources={\n", " \"instance_type\": \"Standard_E8S_V3\",\n", - " \"runtime_version\": \"3.3.0\",\n", + " \"runtime_version\": \"3.4.0\",\n", " },\n", " inputs={\n", " \"titanic_data\": Input(\n", diff --git a/sdk/python/jobs/spark/submit_spark_standalone_jobs_managed_vnet.ipynb b/sdk/python/jobs/spark/submit_spark_standalone_jobs_managed_vnet.ipynb index eace1dce406..96a756b5da2 100644 --- a/sdk/python/jobs/spark/submit_spark_standalone_jobs_managed_vnet.ipynb +++ b/sdk/python/jobs/spark/submit_spark_standalone_jobs_managed_vnet.ipynb @@ -535,7 +535,7 @@ " executor_instances=2,\n", " resources={\n", " \"instance_type\": \"Standard_E8S_V3\",\n", - " \"runtime_version\": \"3.3.0\",\n", + " \"runtime_version\": \"3.4.0\",\n", " },\n", " inputs={\n", " \"titanic_data\": Input(\n", @@ -769,7 +769,7 @@ " executor_instances=2,\n", " resources={\n", " \"instance_type\": \"Standard_E8S_V3\",\n", - " \"runtime_version\": \"3.3.0\",\n", + " \"runtime_version\": \"3.4.0\",\n", " },\n", " inputs={\n", " \"titanic_data\": Input(\n",