diff --git a/introduction_to_applying_machine_learning/breast_cancer_prediction/Breast Cancer Prediction.ipynb b/introduction_to_applying_machine_learning/breast_cancer_prediction/Breast Cancer Prediction.ipynb index 2c6325a7ca..5715849f71 100644 --- a/introduction_to_applying_machine_learning/breast_cancer_prediction/Breast Cancer Prediction.ipynb +++ b/introduction_to_applying_machine_learning/breast_cancer_prediction/Breast Cancer Prediction.ipynb @@ -298,12 +298,8 @@ "outputs": [], "source": [ "# See 'Algorithms Provided by Amazon SageMaker: Common Parameters' in the SageMaker documentation for an explanation of these values.\n", - "containers = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/linear-learner:latest',\n", - " 'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:latest',\n", - " 'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/linear-learner:latest',\n", - " 'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/linear-learner:latest',\n", - " 'ap-northeast-1': '351501993468.dkr.ecr.ap-northeast-1.amazonaws.com/linear-learner:latest',\n", - " 'ap-northeast-2': '835164637446.dkr.ecr.ap-northeast-2.amazonaws.com/linear-learner:latest'}" + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "container = get_image_uri(boto3.Session().region_name, 'linear-learner')" ] }, { @@ -327,7 +323,7 @@ " \"RoleArn\": role,\n", " \"TrainingJobName\": linear_job,\n", " \"AlgorithmSpecification\": {\n", - " \"TrainingImage\": containers[boto3.Session().region_name],\n", + " \"TrainingImage\": container,\n", " \"TrainingInputMode\": \"File\"\n", " },\n", " \"ResourceConfig\": {\n", @@ -439,7 +435,7 @@ "outputs": [], "source": [ "linear_hosting_container = {\n", - " 'Image': containers[boto3.Session().region_name],\n", + " 'Image': container,\n", " 'ModelDataUrl': sm.describe_training_job(TrainingJobName=linear_job)['ModelArtifacts']['S3ModelArtifacts']\n", "}\n", "\n", diff --git a/introduction_to_applying_machine_learning/ensemble_modeling/EnsembleLearnerCensusIncome.ipynb b/introduction_to_applying_machine_learning/ensemble_modeling/EnsembleLearnerCensusIncome.ipynb index ac6c967148..7211a9dc31 100644 --- a/introduction_to_applying_machine_learning/ensemble_modeling/EnsembleLearnerCensusIncome.ipynb +++ b/introduction_to_applying_machine_learning/ensemble_modeling/EnsembleLearnerCensusIncome.ipynb @@ -60,6 +60,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "isConfigCell": true }, "outputs": [], @@ -88,7 +89,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import numpy as np # For matrix operations and numerical processing\n", @@ -128,7 +131,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "## read the data\n", @@ -161,7 +166,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# set display options\n", @@ -180,7 +187,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "## Combine the two datasets to convert the categorical values to binary indicators\n", @@ -275,7 +284,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Split the data randomly as 80% for training and remaining 20% and save them locally\n", @@ -297,7 +308,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "\n", @@ -318,21 +331,21 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "xgboost_containers = {'us-west-2' : '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',\n", - " 'us-east-1' : '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',\n", - " 'us-east-2' : '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',\n", - " 'eu-west-1' : '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest',\n", - " 'ap-northeast-1': '501404015308.dkr.ecr.ap-northeast-1.amazonaws.com/xgboost:latest',\n", - " 'ap-northeast-2': '306986355934.dkr.ecr.ap-northeast-2.amazonaws.com/xgboost:latest'}" + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "container = get_image_uri(boto3.Session().region_name, 'xgboost')" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import boto3\n", @@ -344,7 +357,7 @@ "create_training_params = \\\n", "{\n", " \"AlgorithmSpecification\": {\n", - " \"TrainingImage\": xgboost_containers[boto3.Session().region_name],\n", + " \"TrainingImage\": container,\n", " \"TrainingInputMode\": \"File\"\n", " },\n", " \"RoleArn\": role,\n", @@ -410,7 +423,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time\n", @@ -457,12 +472,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "model_name=job_name + '-mdl'\n", "xgboost_hosting_container = {\n", - " 'Image': xgboost_containers[boto3.Session().region_name],\n", + " 'Image': container,\n", " 'ModelDataUrl': sm.describe_training_job(TrainingJobName=job_name)['ModelArtifacts']['S3ModelArtifacts'],\n", " 'Environment': {'this': 'is'}\n", "}\n", @@ -476,7 +493,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print(create_model_response['ModelArn'])\n", @@ -496,7 +515,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from time import gmtime, strftime\n", @@ -526,7 +547,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time\n", @@ -565,7 +588,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "runtime= boto3.client('runtime.sagemaker')\n" @@ -574,7 +599,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Simple function to create a csv from our numpy array\n", @@ -589,7 +616,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Function to generate prediction through sample data\n", @@ -625,7 +654,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "### read the saved data for scoring\n", @@ -644,7 +675,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "\n", @@ -664,7 +697,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from sklearn.metrics import roc_auc_score\n", @@ -689,7 +724,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "prefix = 'sagemaker/DEMO-linear' ##subfolder inside the data bucket to be used for Linear Learner\n", @@ -725,7 +762,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "train_file = 'linear_train.data'\n", @@ -740,7 +779,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "validation_file = 'linear_validation.data'\n", @@ -776,21 +817,21 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "linear_containers = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/linear-learner:latest',\n", - " 'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:latest',\n", - " 'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/linear-learner:latest',\n", - " 'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/linear-learner:latest',\n", - " 'ap-northeast-1': '351501993468.dkr.ecr.ap-northeast-1.amazonaws.com/linear-learner:latest',\n", - " 'ap-northeast-2': '835164637446.dkr.ecr.ap-northeast-2.amazonaws.com/linear-learner:latest'}" + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "container = get_image_uri(boto3.Session().region_name, 'linear-learner')" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "linear_job = 'DEMO-linear-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n", @@ -801,7 +842,7 @@ " \"RoleArn\": role,\n", " \"TrainingJobName\": linear_job,\n", " \"AlgorithmSpecification\": {\n", - " \"TrainingImage\": linear_containers[boto3.Session().region_name],\n", + " \"TrainingImage\": container,\n", " \"TrainingInputMode\": \"File\"\n", " },\n", " \"ResourceConfig\": {\n", @@ -856,7 +897,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print(linear_job)" @@ -872,7 +915,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time\n", @@ -906,12 +951,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "\n", "linear_hosting_container = {\n", - " 'Image': linear_containers[boto3.Session().region_name],\n", + " 'Image': container,\n", " 'ModelDataUrl': sm.describe_training_job(TrainingJobName=linear_job)['ModelArtifacts']['S3ModelArtifacts']\n", "}\n", "\n", @@ -938,7 +985,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "linear_endpoint_config = 'DEMO-linear-endpoint-config-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n", @@ -964,7 +1013,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time\n", @@ -1004,7 +1055,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def np2csv(arr):\n", @@ -1016,7 +1069,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Function to generate prediction through sample data\n", @@ -1051,7 +1106,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "### Predict on Training Data\n", @@ -1061,7 +1118,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "### Predict on Validation Data\n", @@ -1071,7 +1130,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "### Predict on Test Data\n", @@ -1088,7 +1149,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print(\"Training AUC\", roc_auc_score(train_labels, preds_train_lin)) ##0.9091\n", @@ -1107,7 +1170,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "ens_train = 0.5*np.array(preds_train_xgb) + 0.5*np.array(preds_train_lin);\n", @@ -1127,7 +1192,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "#Print AUC of the combined model\n", @@ -1164,7 +1231,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "final = pd.concat([data_test.iloc[:,0], pd.DataFrame(ens_test)], axis=1)\n", @@ -1181,7 +1250,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "sm.delete_endpoint(EndpointName=endpoint_name)\n", diff --git a/introduction_to_applying_machine_learning/linear_time_series_forecast/linear_time_series_forecast.ipynb b/introduction_to_applying_machine_learning/linear_time_series_forecast/linear_time_series_forecast.ipynb index 6369b15bd0..5a5c730374 100644 --- a/introduction_to_applying_machine_learning/linear_time_series_forecast/linear_time_series_forecast.ipynb +++ b/introduction_to_applying_machine_learning/linear_time_series_forecast/linear_time_series_forecast.ipynb @@ -51,6 +51,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "isConfigCell": true }, "outputs": [], @@ -81,6 +82,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "nbpresent": { "id": "bb88eea9-27f3-4e47-9133-663911ea09a9" } @@ -117,6 +119,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "nbpresent": { "id": "78105bc7-ce5d-4003-84f6-4dc5700c5945" } @@ -141,6 +144,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "nbpresent": { "id": "f8976dad-6897-4c7e-8c95-ae2f53070ef5" } @@ -173,6 +177,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "nbpresent": { "id": "6af8d66e-2ef6-4e8d-bb23-d2bd3dbb0b20" } @@ -208,6 +213,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "nbpresent": { "id": "80c0adca-5db2-4152-a9f4-42cbc1dbde84" } @@ -240,7 +246,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "buf = io.BytesIO()\n", @@ -251,7 +259,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "key = 'linear_train.data'\n", @@ -270,7 +280,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "buf = io.BytesIO()\n", @@ -281,7 +293,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "key = 'linear_validation.data'\n", @@ -307,15 +321,13 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "containers = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/linear-learner:latest',\n", - " 'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:latest',\n", - " 'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/linear-learner:latest',\n", - " 'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/linear-learner:latest',\n", - " 'ap-northeast-1': '351501993468.dkr.ecr.ap-northeast-1.amazonaws.com/linear-learner:latest',\n", - " 'ap-northeast-2': '835164637446.dkr.ecr.ap-northeast-2.amazonaws.com/linear-learner:latest'}" + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "container = get_image_uri(boto3.Session().region_name, 'linear-learner')" ] }, { @@ -334,12 +346,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "sess = sagemaker.Session()\n", "\n", - "linear = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],\n", + "linear = sagemaker.estimator.Estimator(container,\n", " role, \n", " train_instance_count=1, \n", " train_instance_type='ml.c4.xlarge',\n", @@ -372,7 +386,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "linear_predictor = linear.deploy(initial_instance_count=1,\n", @@ -408,7 +424,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "gas['thousands_barrels_lag52'] = gas['thousands_barrels'].shift(52)\n", @@ -427,7 +445,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('Naive MdAPE =', np.median(np.abs(test_y - naive) / test_y))\n", @@ -447,7 +467,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "linear_predictor.content_type = 'text/csv'\n", @@ -465,7 +487,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "result = linear_predictor.predict(test_X)\n", @@ -482,7 +506,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('One-step-ahead MdAPE = ', np.median(np.abs(test_y - one_step) / test_y))\n", @@ -504,7 +530,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "multi_step = []\n", @@ -530,7 +558,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('Multi-step-ahead MdAPE =', np.median(np.abs(test_y - multi_step) / test_y))\n", @@ -569,7 +599,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "sagemaker.Session().delete_endpoint(linear_predictor.endpoint)" diff --git a/introduction_to_applying_machine_learning/ntm_20newsgroups_topic_modeling/ntm_20newsgroups_topic_model.ipynb b/introduction_to_applying_machine_learning/ntm_20newsgroups_topic_modeling/ntm_20newsgroups_topic_model.ipynb index 64726cbe93..3e5d349a2e 100644 --- a/introduction_to_applying_machine_learning/ntm_20newsgroups_topic_modeling/ntm_20newsgroups_topic_model.ipynb +++ b/introduction_to_applying_machine_learning/ntm_20newsgroups_topic_modeling/ntm_20newsgroups_topic_model.ipynb @@ -88,7 +88,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import os\n", @@ -108,7 +110,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# **Acknowledgements, Copyright Information, and Availability**\n", @@ -130,7 +134,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "!tar -xzf 20_newsgroups.tar.gz\n", @@ -140,7 +146,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "folders = [os.path.join(data_dir,f) for f in sorted(os.listdir(data_dir)) if os.path.isdir(os.path.join(data_dir, f))]\n", @@ -158,7 +166,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from sklearn.datasets.twenty_newsgroups import strip_newsgroup_header, strip_newsgroup_quoting, strip_newsgroup_footer\n", @@ -183,7 +193,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "data[10:13]" @@ -224,7 +236,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "!pip install nltk\n", @@ -253,7 +267,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import time\n", @@ -286,7 +302,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "threshold = 25\n", @@ -305,7 +323,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print(type(vectors), vectors.dtype)\n", @@ -322,7 +342,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import scipy.sparse as sparse\n", @@ -344,7 +366,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "n_train = int(0.8 * vectors.shape[0])\n", @@ -362,7 +386,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print(train_vectors.shape, test_vectors.shape, val_vectors.shape)" @@ -393,7 +419,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import os\n", @@ -430,7 +458,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# update sagemake package, in order to use write_spmatrix_to_sparse_tensor in the next cell\n", @@ -440,7 +470,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def split_convert_upload(sparray, bucket, prefix, fname_template='data_part{}.pbr', n_parts=2):\n", @@ -471,7 +503,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "split_convert_upload(train_vectors, bucket=bucket, prefix=train_prefix, fname_template='train_part{}.pbr', n_parts=8)\n", @@ -500,15 +534,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "containers = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:latest',\n", - " 'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/ntm:latest',\n", - " 'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/ntm:latest',\n", - " 'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/ntm:latest',\n", - " 'ap-northeast-1': '351501993468.dkr.ecr.ap-northeast-1.amazonaws.com/ntm:latest',\n", - " 'ap-northeast-2': '835164637446.dkr.ecr.ap-northeast-2.amazonaws.com/ntm:latest'}" + "import boto3\n", + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "container = get_image_uri(boto3.Session().region_name, 'ntm')" ] }, { @@ -521,13 +554,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import sagemaker\n", - "import boto3\n", "sess = sagemaker.Session()\n", - "ntm = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],\n", + "ntm = sagemaker.estimator.Estimator(container,\n", " role, \n", " train_instance_count=2, \n", " train_instance_type='ml.c4.xlarge',\n", @@ -556,6 +590,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -580,7 +615,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from sagemaker.session import s3_input\n", @@ -598,6 +635,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -619,7 +657,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('Training job name: {}'.format(ntm.latest_training_job.job_name))" @@ -639,7 +679,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "ntm_predictor = ntm.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')" @@ -655,7 +697,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('Endpoint name: {}'.format(ntm_predictor.endpoint))" @@ -681,7 +725,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from sagemaker.predictor import csv_serializer, json_deserializer\n", @@ -701,7 +747,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "test_data = np.array(test_vectors.todense())\n", @@ -732,7 +780,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "predictions = np.array([prediction['topic_weights'] for prediction in results['predictions']])\n", @@ -753,7 +803,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def recordio_protobuf_serializer(spmatrix):\n", @@ -775,7 +827,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "ntm_predictor.content_type = 'application/x-recordio-protobuf'\n", @@ -852,7 +906,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "sagemaker.Session().delete_endpoint(ntm_predictor.endpoint)" @@ -878,7 +934,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# If you use conda_mxnet_p36 kernel, mxnet is already installed, otherwise, uncomment the following line to install.\n", @@ -896,7 +954,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "model_path = os.path.join(output_prefix, ntm._current_job_name, 'output/model.tar.gz')\n", @@ -906,7 +966,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "boto3.resource('s3').Bucket(bucket).download_file(model_path, 'downloaded_model.tar.gz')" @@ -915,7 +977,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "!tar -xzvf 'downloaded_model.tar.gz'" @@ -924,7 +988,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# use flag -o to overwrite previous unzipped content\n", @@ -941,7 +1007,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "model = mx.ndarray.load('params')\n", @@ -958,7 +1026,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "!pip install wordcloud\n", @@ -1020,7 +1090,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [] } diff --git a/introduction_to_applying_machine_learning/video_game_sales/video-game-sales-xgboost.ipynb b/introduction_to_applying_machine_learning/video_game_sales/video-game-sales-xgboost.ipynb index b03bb8147e..f9660f7bb9 100644 --- a/introduction_to_applying_machine_learning/video_game_sales/video-game-sales-xgboost.ipynb +++ b/introduction_to_applying_machine_learning/video_game_sales/video-game-sales-xgboost.ipynb @@ -337,21 +337,15 @@ "job_name = 'DEMO-videogames-xgboost-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", "print(\"Training job\", job_name)\n", "\n", - "containers = {\n", - " 'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',\n", - " 'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',\n", - " 'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',\n", - " 'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest',\n", - " 'ap-northeast-1': '501404015308.dkr.ecr.ap-northeast-1.amazonaws.com/xgboost:latest',\n", - " 'ap-northeast-2': '306986355934.dkr.ecr.ap-northeast-2.amazonaws.com/xgboost:latest'\n", - " }\n", + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "container = get_image_uri(boto3.Session().region_name, 'xgboost')\n", "\n", "create_training_params = \\\n", "{\n", " \"RoleArn\": role,\n", " \"TrainingJobName\": job_name,\n", " \"AlgorithmSpecification\": {\n", - " \"TrainingImage\": containers[boto3.Session().region_name],\n", + " \"TrainingImage\": container,\n", " \"TrainingInputMode\": \"File\"\n", " },\n", " \"ResourceConfig\": {\n", @@ -456,7 +450,7 @@ " ModelName=job_name,\n", " ExecutionRoleArn=role,\n", " PrimaryContainer={\n", - " 'Image': containers[boto3.Session().region_name],\n", + " 'Image': container,\n", " 'ModelDataUrl': sm.describe_training_job(TrainingJobName=job_name)['ModelArtifacts']['S3ModelArtifacts']})\n", "\n", "print(create_model_response['ModelArn'])" diff --git a/introduction_to_applying_machine_learning/xgboost_customer_churn/xgboost_customer_churn.ipynb b/introduction_to_applying_machine_learning/xgboost_customer_churn/xgboost_customer_churn.ipynb index cea51f864e..5f6cacc92d 100644 --- a/introduction_to_applying_machine_learning/xgboost_customer_churn/xgboost_customer_churn.ipynb +++ b/introduction_to_applying_machine_learning/xgboost_customer_churn/xgboost_customer_churn.ipynb @@ -108,7 +108,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "!wget http://dataminingconsultant.com/DKD2e_data_sets.zip\n", @@ -118,7 +120,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "churn = pd.read_csv('./Data sets/churn.txt')\n", @@ -156,7 +160,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Frequency tables for each categorical feature\n", @@ -202,7 +208,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "for column in churn.select_dtypes(include=['object']).columns:\n", @@ -232,7 +240,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "display(churn.corr())\n", @@ -340,12 +350,8 @@ }, "outputs": [], "source": [ - "containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',\n", - " 'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',\n", - " 'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',\n", - " 'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest',\n", - " 'ap-northeast-1': '501404015308.dkr.ecr.ap-northeast-1.amazonaws.com/xgboost:latest',\n", - " 'ap-northeast-2': '306986355934.dkr.ecr.ap-northeast-2.amazonaws.com/xgboost:latest'}" + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "container = get_image_uri(boto3.Session().region_name, 'xgboost')" ] }, { @@ -358,7 +364,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket, prefix), content_type='csv')\n", @@ -382,12 +390,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "sess = sagemaker.Session()\n", "\n", - "xgb = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],\n", + "xgb = sagemaker.estimator.Estimator(container,\n", " role, \n", " train_instance_count=1, \n", " train_instance_type='ml.m4.xlarge',\n", @@ -418,7 +428,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "xgb_predictor = xgb.deploy(initial_instance_count=1,\n", @@ -488,7 +500,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "pd.crosstab(index=test_data.iloc[:, 0], columns=np.round(predictions), rownames=['actual'], colnames=['predictions'])" @@ -510,7 +524,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "plt.hist(predictions)\n", @@ -527,7 +543,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "pd.crosstab(index=test_data.iloc[:, 0], columns=np.where(predictions > 0.3, 1, 0))" @@ -576,7 +594,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "cutoffs = np.arange(0.01, 1, 0.01)\n", @@ -628,7 +648,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)" diff --git a/introduction_to_applying_machine_learning/xgboost_direct_marketing/xgboost_direct_marketing_sagemaker.ipynb b/introduction_to_applying_machine_learning/xgboost_direct_marketing/xgboost_direct_marketing_sagemaker.ipynb index 4181979d88..70a62f229c 100644 --- a/introduction_to_applying_machine_learning/xgboost_direct_marketing/xgboost_direct_marketing_sagemaker.ipynb +++ b/introduction_to_applying_machine_learning/xgboost_direct_marketing/xgboost_direct_marketing_sagemaker.ipynb @@ -111,7 +111,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip\n", @@ -128,7 +130,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "data = pd.read_csv('./bank-additional/bank-additional-full.csv', sep=';')\n", @@ -195,6 +199,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": false }, "outputs": [], @@ -232,6 +237,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": false }, "outputs": [], @@ -261,7 +267,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "display(data.corr())\n", @@ -414,12 +422,8 @@ }, "outputs": [], "source": [ - "containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',\n", - " 'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',\n", - " 'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',\n", - " 'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest',\n", - " 'ap-northeast-1': '501404015308.dkr.ecr.ap-northeast-1.amazonaws.com/xgboost:latest',\n", - " 'ap-northeast-2': '306986355934.dkr.ecr.ap-northeast-2.amazonaws.com/xgboost:latest'}" + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "container = get_image_uri(boto3.Session().region_name, 'xgboost')" ] }, { @@ -459,12 +463,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "sess = sagemaker.Session()\n", "\n", - "xgb = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],\n", + "xgb = sagemaker.estimator.Estimator(container,\n", " role, \n", " train_instance_count=1, \n", " train_instance_type='ml.m4.xlarge',\n", diff --git a/sagemaker-python-sdk/1P_kmeans_lowlevel/kmeans_mnist_lowlevel.ipynb b/sagemaker-python-sdk/1P_kmeans_lowlevel/kmeans_mnist_lowlevel.ipynb index d7c1448542..31f748fbf6 100644 --- a/sagemaker-python-sdk/1P_kmeans_lowlevel/kmeans_mnist_lowlevel.ipynb +++ b/sagemaker-python-sdk/1P_kmeans_lowlevel/kmeans_mnist_lowlevel.ipynb @@ -51,6 +51,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "isConfigCell": true }, "outputs": [], @@ -75,7 +76,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time\n", @@ -99,7 +102,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -132,7 +137,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time\n", @@ -166,7 +173,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time\n", @@ -176,13 +185,8 @@ "job_name = 'kmeans-lowlevel-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", "print(\"Training job\", job_name)\n", "\n", - "images = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:latest',\n", - " 'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:latest',\n", - " 'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/kmeans:latest',\n", - " 'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/kmeans:latest',\n", - " 'ap-northeast-1': '351501993468.dkr.ecr.ap-northeast-1.amazonaws.com/kmeans:latest',\n", - " 'ap-northeast-2': '835164637446.dkr.ecr.ap-northeast-2.amazonaws.com/kmeans:latest'}\n", - "image = images[boto3.Session().region_name]\n", + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "image = get_image_uri(boto3.Session().region_name, 'kmeans')\n", "\n", "output_location = 's3://{}/kmeans_example/output'.format(bucket)\n", "print('training artifacts will be uploaded to: {}'.format(output_location))\n", @@ -261,7 +265,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time\n", @@ -299,7 +305,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from time import gmtime, strftime\n", @@ -328,7 +336,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time\n", @@ -370,7 +380,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Simple function to create a csv from our numpy array\n", @@ -383,7 +395,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "runtime = boto3.Session().client('runtime.sagemaker')" @@ -392,7 +406,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import json\n", @@ -418,7 +434,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time \n", @@ -467,7 +485,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "sagemaker.delete_endpoint(EndpointName=endpoint_name)" diff --git a/scientific_details_of_algorithms/lda_topic_modeling/LDA-Science.ipynb b/scientific_details_of_algorithms/lda_topic_modeling/LDA-Science.ipynb index c316f0735a..221dd514ed 100644 --- a/scientific_details_of_algorithms/lda_topic_modeling/LDA-Science.ipynb +++ b/scientific_details_of_algorithms/lda_topic_modeling/LDA-Science.ipynb @@ -35,7 +35,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "!conda install -y scipy" @@ -44,7 +46,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -95,6 +99,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "isConfigCell": true }, "outputs": [], @@ -158,7 +163,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('Generating example data...')\n", @@ -192,7 +199,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('First training document =\\n{}'.format(documents_training[0]))\n", @@ -203,7 +212,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "average_document_length = documents.sum(axis=1).mean()\n", @@ -224,7 +235,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('First topic =\\n{}'.format(known_beta[0]))\n", @@ -245,7 +258,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('Topic #1:\\n{}'.format(known_beta[0]))\n", @@ -264,7 +279,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -286,7 +303,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -353,7 +372,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('First training document =\\n{}'.format(documents_training[0]))\n", @@ -364,7 +385,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('First training document topic mixture =\\n{}'.format(topic_mixtures_training[0]))\n", @@ -382,7 +405,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -406,7 +431,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -428,7 +455,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -489,7 +518,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# convert documents_training to Protobuf RecordIO format\n", @@ -515,19 +546,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "containers = {\n", - " 'us-west-2': '266724342769.dkr.ecr.us-west-2.amazonaws.com/lda:latest',\n", - " 'us-east-1': '766337827248.dkr.ecr.us-east-1.amazonaws.com/lda:latest',\n", - " 'us-east-2': '999911452149.dkr.ecr.us-east-2.amazonaws.com/lda:latest',\n", - " 'eu-west-1': '999678624901.dkr.ecr.eu-west-1.amazonaws.com/lda:latest',\n", - " 'ap-northeast-1': '258307448986.dkr.ecr.ap-northeast-1.amazonaws.com/lda:latest',\n", - " 'ap-northeast-2': '293181348795.dkr.ecr.ap-northeast-2.amazonaws.com/lda:latest'\n", - "}\n", + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "\n", "region_name = boto3.Session().region_name\n", - "container = containers[region_name]\n", + "container = get_image_uri(boto3.Session().region_name, 'lda')\n", "\n", "print('Using SageMaker LDA container: {} ({})'.format(container, region_name))" ] @@ -573,7 +600,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "session = sagemaker.Session()\n", @@ -614,7 +643,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('Training job name: {}'.format(lda.latest_training_job.job_name))" @@ -638,7 +669,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# download and extract the model file from S3\n", @@ -678,7 +711,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "permutation, learned_beta = match_estimated_topics(known_beta, learned_beta_permuted)\n", @@ -720,7 +755,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "lda_inference = lda.deploy(\n", @@ -739,7 +776,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print('Endpoint name: {}'.format(lda_inference.endpoint))" @@ -777,7 +816,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "results = lda_inference.predict(documents_test[:12])\n", @@ -808,7 +849,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "inferred_topic_mixtures_permuted = np.array([prediction['topic_mixture'] for prediction in results['predictions']])\n", @@ -828,7 +871,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "inferred_topic_mixtures = inferred_topic_mixtures_permuted[:,permutation]\n", @@ -846,7 +891,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -893,7 +940,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%%time\n", @@ -923,7 +972,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -993,7 +1044,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", @@ -1006,7 +1059,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%matplotlib inline\n", diff --git a/scientific_details_of_algorithms/linear_learner_class_weights_loss_functions/linear_learner_class_weights_loss_functions.ipynb b/scientific_details_of_algorithms/linear_learner_class_weights_loss_functions/linear_learner_class_weights_loss_functions.ipynb index 5cfdbb30de..201dd404fa 100644 --- a/scientific_details_of_algorithms/linear_learner_class_weights_loss_functions/linear_learner_class_weights_loss_functions.ipynb +++ b/scientific_details_of_algorithms/linear_learner_class_weights_loss_functions/linear_learner_class_weights_loss_functions.ipynb @@ -510,19 +510,16 @@ }, "outputs": [], "source": [ + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "\n", "def predictor_from_hyperparams(s3_train_data, hyperparams, output_path):\n", " \"\"\"\n", " Create an Estimator from the given hyperparams, fit to training data, and return a deployed predictor\n", " \"\"\"\n", " # specify algorithm containers and instantiate an Estimator with given hyperparams\n", - " containers = {\n", - " 'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/linear-learner:latest',\n", - " 'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:latest',\n", - " 'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/linear-learner:latest',\n", - " 'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/linear-learner:latest',\n", - " 'ap-northeast-1': '351501993468.dkr.ecr.ap-northeast-1.amazonaws.com/linear-learner:latest',\n", - " 'ap-northeast-2': '835164637446.dkr.ecr.ap-northeast-2.amazonaws.com/linear-learner:latest'}\n", - " linear = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],\n", + " container = get_image_uri(boto3.Session().region_name, 'linear-learner')\n", + "\n", + " linear = sagemaker.estimator.Estimator(container,\n", " role,\n", " train_instance_count=1,\n", " train_instance_type='ml.m4.xlarge',\n",