Skip to content

Commit

Permalink
First local training and s3 copy
Browse files Browse the repository at this point in the history
  • Loading branch information
fwhigh committed Jan 9, 2019
1 parent 0129142 commit c1758c8
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 350 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
.idea
data
batch_create_compute_env.json
*~

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

FROM 015216235264.dkr.ecr.us-west-1.amazonaws.com/pmip:latest

EXPOSE 8000
ENTRYPOINT []
CMD [ "gunicorn", "-w", "3", "-b", ":8000", "wsgi" ]
ENTRYPOINT [ "bash" ]
CMD [ "-c", "gunicorn -w 3 -b :8000 wsgi" ]
2 changes: 0 additions & 2 deletions Dockerfile.train
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ COPY . .

RUN ENVIRONMENT=$ENVIRONMENT bash scripts/install.sh

RUN bash scripts/get_glove.sh

EXPOSE 8888
ENTRYPOINT [ "bash" ]
CMD [ "-c", "jupyter notebook notebooks/ --allow-root --ip=0.0.0.0 --port=8888 --no-browser" ]
20 changes: 14 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@ Predictive Models in Production

### Pro tips

If you ever find yourself with a "no space left on device" error, try
If you ever find yourself with a "no space left on device" error when building the Docker image, try

```bash
docker rm $(docker ps -q -f 'status=exited')
docker rmi $(docker images -q -f "dangling=true")
```

See, eg, https://forums.docker.com/t/no-space-left-on-device-error/10894/14.

### Build the base training image

```bash
Expand All @@ -24,17 +22,27 @@ ENVIRONMENT=dev bash scripts/build_training_image.sh
### Do interactive model training and data exploration in the Jupyter notebook

```bash
ENVIRONMENT=dev bash scripts/run_training_container.sh
ENVIRONMENT=dev bash scripts/run_training_container.sh -c jupyter notebook notebooks/ --allow-root --ip=0.0.0.0 --port=8888 --no-browser
```

Then open [http://localhost:8888](http://localhost:8888).
Then open [http://localhost:8888](http://localhost:8888) to run Jupyter.

### Train a model programmatically

```bash
ENVIRONMENT=dev RUNID=`date +%Y%m%d` bash scripts/run_training_container.sh scripts/train.sh
ENVIRONMENT=dev bash scripts/run_training_container.sh scripts/train.sh
```

### Pushing the new Docker image to production for the training and API services

If this is your first and only ECR repo, then run

```bash
bash scripts/push_image.sh $(aws ecr describe-repositories | jq -r '.repositories[0].repositoryUri')
```

You have have multiple ECR repos you'll have to change the argument so that it points to the one you want to push to.

## Resources

* https://github.com/pypa/sampleproject/blob/master/setup.py
335 changes: 23 additions & 312 deletions notebooks/model-training.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ xgboost
sklearn
nltk
jupyter
papermill
papermill
awscli
2 changes: 1 addition & 1 deletion scripts/get_glove.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ if [ ! -f "data/glove/glove.840B.300d.txt" ]; then
mv glove.840B.300d.zip data/glove
unzip data/glove/glove.840B.300d.zip -d data/glove
rm data/glove/glove.840B.300d.zip
figo
fi
15 changes: 15 additions & 0 deletions scripts/get_latest_model.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env bash

set -e

BUCKET="s3://fwhigh-predictive-models"
MODEL_ID=$(aws s3 ls ${BUCKET}/models/ | awk '$1~/PRE/ {print $2}' | sed 's/\///g' | sort -nr | head -n 1)
S3_DIR=$BUCKET/models/$MODEL_ID
DIR=data

echo Getting data from $S3_DIR
echo Writing it to $DIR

mkdir -p $DIR

aws s3 cp --recursive --exclude "*" --include "model.pkl" $S3_DIR/ $DIR/
10 changes: 6 additions & 4 deletions scripts/get_training_data.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#!/usr/bin/env bash

set -e
S3_DIR=$1
DIR=$2

$DIR=$1
echo Getting data from $S3_DIR
echo Writing it to $DIR

aws s3 cp --recursive $S3_DIR/ $DIR/
cd $DIR
wget https://archive.ics.uci.edu/ml/machine-learning-databases/00380/YouTube-Spam-Collection-v1.zip
unzip YouTube-Spam-Collection-v1.zip
unzip -v *.zip
11 changes: 1 addition & 10 deletions scripts/install.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
#!/usr/bin/env bash

if [ "$ENVIRONMENT" == "prod" ]; then
echo "Installing pmip package in prod environment"
pip install -U --upgrade-strategy only-if-needed -e .
elif [ "$ENVIRONMENT" == "staging" ]; then
echo "Installing pmip package in staging environment"
pip install -U --upgrade-strategy only-if-needed .
else
echo "Installing pmip package in dev environment"
pip install -U --upgrade-strategy only-if-needed .
fi
pip install -U --upgrade-strategy only-if-needed -e .
10 changes: 10 additions & 0 deletions scripts/push_image.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

ECR_URI=$1
AWS_REGION=$(aws configure get region)

IMAGE_TAG=latest

$(aws ecr get-login --no-include-email --region $AWS_REGION)
docker tag pmip:staging ${ECR_URI}:${IMAGE_TAG}
docker push ${ECR_URI}:${IMAGE_TAG}
24 changes: 13 additions & 11 deletions scripts/train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,28 @@

set -e

RUNID=`date +%Y%m%d`
DATA_DIR=data/${RUNID}
BUCKET="s3://predictive-models"
S3_DATA_DIR=${BUCKET}/${RUNID}
BUCKET="s3://fwhigh-predictive-models"
MODEL_ID=`date +%Y%m%d`
S3_DIR=$BUCKET/models/$MODEL_ID
DIR=data

mkdir -p DATA_DIR
TRAINING_ID=$(aws s3 ls $BUCKET/training/ | awk '$1~/PRE/ {print $2}' | sed 's/\///g' | sort -nr | head -n 1)

mkdir -p $DIR

# Get the data. Replace this line with something like:
# aws s3 cp $BUCKET/training-data/ $DATA_DIR/
# aws s3 cp $BUCKET/training-data/ $DIR/
# to train on new data that's placed into S3 directly.
bash scripts/get_training_data.sh ${DATA_DIR}
bash scripts/get_training_data.sh $BUCKET/training/$TRAINING_ID $DIR

# Train the model
papermill notebooks/model-training.ipynb ${DATA_DIR}/model-training-${RUNID}.ipynb \
-p RUNID ${RUNID} -p DATA_DIR ${DATA_DIR}
papermill notebooks/model-training.ipynb $DIR/model-training-$MODEL_ID.ipynb -p DATA_DIR $DIR

# Convert the notebook into HTML
jupyter nbconvert --to html ${DATA_DIR}/model-training-${RUNID}.ipynb
jupyter nbconvert --to html $DIR/model-training-$MODEL_ID.ipynb

# Push any assets to the cloud
if [ "$ENVIRONMENT" == "staging" ]; then
aws s3 cp --exclude * --include *.ipynb *.html *.pkl ${DATA_DIR}/ S3_DATA_DIR/
echo Pushing model to S3
aws s3 cp --recursive --exclude "*" --include "*.ipynb" --include "*.html" --include "*.pkl" $DIR/ $S3_DIR/
fi

0 comments on commit c1758c8

Please sign in to comment.