Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
Additional content for leadership mentors and transcriptions changed …
Browse files Browse the repository at this point in the history
…from Watson to AWS
  • Loading branch information
beatthat authored Jan 21, 2020
1 parent a3a2dcc commit 39e3ba8
Show file tree
Hide file tree
Showing 949 changed files with 29,755 additions and 15,448 deletions.
2 changes: 1 addition & 1 deletion checkpoint/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CHECKPOINT_ROOT=$(shell pwd)
PROJECT_ROOT?=$(shell git rev-parse --show-toplevel 2> /dev/null)
CHECKPOINT?=2019-11-09-0031
CHECKPOINT?=2019-11-14-2031
MENTOR_ROOT=$(PROJECT_ROOT)/mentors/data/mentors
ARCH?=lstm_v1
MENTOR?=clint
Expand Down
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
71 changes: 47 additions & 24 deletions mentors/Makefile
Original file line number Diff line number Diff line change
@@ -1,16 +1,53 @@
PWD=$(shell pwd)
DOCKER_IMAGE?=uscictdocker/mentor-pipeline:1.1.0
DOCKER_IMAGE?=uscictdocker/mentor-pipeline:1.3.0
DOCKER_CONTAINER=mentor-pipeline
PROJECT_ROOT?=$(shell git rev-parse --show-toplevel 2> /dev/null)
WATSON_CREDENTIALS=secrets/watson_credentials.txt
WATSON_USERNAME?=$(shell if [ -f $(WATSON_CREDENTIALS) ]; then head -n 1 $(WATSON_CREDENTIALS); else echo ""; fi)
WATSON_PASSWORD?=$(shell if [ -f $(WATSON_CREDENTIALS) ]; then tail -n 1 $(WATSON_CREDENTIALS); else echo ""; fi)

AWS_REGION?=us-east-1
TRANSCRIBE_AWS_S3_BUCKET_SOURCE?=mentorpal-transcribe-source
TRANSCRIBE_MODULE_PATH?=transcribe_aws
DEV_ENABLED?=
DEV_ROOT?=$(shell cd ~/projects && pwd 2> /dev/null)
DEV_MENTOR_PIPELINE?=$(shell cd $(DEV_ROOT)/mentor-pipeline && pwd 2> /dev/null)
DEV_TRANSCRIBE?=$(shell cd $(DEV_ROOT)/py-transcribe && pwd 2> /dev/null)
DEV_TRANSCRIBE_AWS?=$(shell cd $(DEV_ROOT)/py-transcribe-aws && pwd 2> /dev/null)
DOCKER_PYTHON_VERSION=3.7
DOCKER_SITE_PACKAGES=/usr/local/lib/python$(DOCKER_PYTHON_VERSION)/site-packages
DOCKER_ENV_ARGS=\
-e AWS_REGION=$(AWS_REGION) \
-e AWS_ACCESS_KEY_ID=$(AWS_ACCESS_KEY_ID) \
-e AWS_SECRET_ACCESS_KEY=$(AWS_SECRET_ACCESS_KEY) \
-e TRANSCRIBE_AWS_S3_BUCKET_SOURCE=$(TRANSCRIBE_AWS_S3_BUCKET_SOURCE) \
-e TRANSCRIBE_MODULE_PATH=$(TRANSCRIBE_MODULE_PATH)
DOCKER_VOLUME_ARGS_DATA_AND_VIDEO=\
-v $(PWD)/data:/app/mounts/data \
-v $(PWD)/videos:/app/mounts/videos
DOCKER_VOLUME_ARGS_DEV=
ifeq ("$(DEV_ENABLED)", "1")
ifneq ("$(DEV_TRANSCRIBE)", "")
DOCKER_VOLUME_ARGS_DEV += -v $(DEV_TRANSCRIBE)/transcribe:$(DOCKER_SITE_PACKAGES)/transcribe
endif
ifneq ("$(DEV_TRANSCRIBE_AWS)", "")
DOCKER_VOLUME_ARGS_DEV += -v $(DEV_TRANSCRIBE_AWS)/transcribe_aws:$(DOCKER_SITE_PACKAGES)/transcribe_aws
endif
ifneq ("$(DEV_MENTOR_PIPELINE)", "")
DOCKER_VOLUME_ARGS_DEV += -v $(DEV_MENTOR_PIPELINE)/mentor_pipeline:/app/mentor_pipeline
endif
endif
DOCKER_ARGS=\
$(DOCKER_ENV_ARGS) \
$(DOCKER_VOLUME_ARGS_DATA_AND_VIDEO) \
$(DOCKER_VOLUME_ARGS_DEV)
# virtualenv used for pytest
VENV=.venv
$(VENV):
$(MAKE) venv-create

et:
@echo "DEV_TRANSCRIBE=$(DEV_TRANSCRIBE)"

ev:
@echo "DOCKER_VOLUME_ARGS_DEV=$(DOCKER_VOLUME_ARGS_DEV)"

.PHONY: venv-create
venv-create: virtualenv-installed
[ -d $(VENV) ] || virtualenv -p python3 $(VENV)
Expand All @@ -21,11 +58,6 @@ venv-create: virtualenv-installed
virtualenv-installed:
$(PROJECT_ROOT)/bin/virtualenv_ensure_installed.sh

$(WATSON_CREDENTIALS):
@echo "SET_USERNAME_HERE" > $(WATSON_CREDENTIALS)
@echo "SET_PASSWORD_HERE" >> $(WATSON_CREDENTIALS)
chmod 600 $(WATSON_CREDENTIALS)

# Removes single mentor's data files from the local file system
.PHONY: data/mentors/%/clean
data/mentors/%/clean:
Expand All @@ -47,16 +79,13 @@ clean:

# Runs a shell inside the data processing pipeline dockerfile
.PHONY shell:
shell: $(WATSON_CREDENTIALS)
shell:
docker run \
-it \
--rm \
--name $(DOCKER_CONTAINER) \
-e WATSON_USERNAME=$(WATSON_USERNAME) \
-e WATSON_PASSWORD=$(WATSON_PASSWORD) \
--entrypoint /bin/bash \
-v $(PWD)/data:/app/mounts/data \
-v $(PWD)/videos:/app/mounts/videos \
$(DOCKER_ARGS) \
$(DOCKER_IMAGE)


Expand All @@ -65,23 +94,19 @@ shell: $(WATSON_CREDENTIALS)
# Generates data files
# TODO: 1) log every significant action (generating audio, transcribing), 2) build classifier for jd, 3) utterance yaml gets error codes, 4) make delete audio files that failed to transcribe
.PHONY: data/mentors-%
data/mentors-%: $(WATSON_CREDENTIALS)
data/mentors-%:
docker run \
--rm \
--name $(DOCKER_CONTAINER) \
-v $(PWD)/data:/app/mounts/data \
-e WATSON_USERNAME=$(WATSON_USERNAME) \
-e WATSON_PASSWORD=$(WATSON_PASSWORD) \
$(DOCKER_IMAGE) --mentor $* --data-update --data=/app/mounts/data/mentors
$(DOCKER_ARGS) \
$(DOCKER_IMAGE) --mentor $* --data-update --data=/app/mounts/data/mentors $(args)

.PHONY: data/topics_by_question.csv/mentor/%
data/topics_by_question.csv/mentors/%:
docker run \
--rm \
--name $(DOCKER_CONTAINER) \
-v $(PWD)/data:/app/mounts/data \
-e WATSON_USERNAME=$(WATSON_USERNAME) \
-e WATSON_PASSWORD=$(WATSON_PASSWORD) \
$(DOCKER_IMAGE) --mentor $* --topics-by-question-generate --data=/app/mounts/data/mentors


Expand All @@ -92,8 +117,6 @@ videos/mentors/%: data/mentors/%
--name $(DOCKER_CONTAINER) \
-v $(PWD)/data:/app/mounts/data \
-v $(PWD)/videos:/app/mounts/videos \
-e WATSON_USERNAME=$(WATSON_USERNAME) \
-e WATSON_PASSWORD=$(WATSON_PASSWORD) \
$(DOCKER_IMAGE) --mentor $* --videos-update --data=/app/mounts/data/mentors

# Build checkpoint from mentor data
Expand Down
98 changes: 97 additions & 1 deletion mentors/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ data/mentors/my_new_mentor/
│   │   │   ├── p001-more-questions.mp4
```

#### Configure AWS Credentials

TODO:
- figure out a better way to handle AWS credentials for running locally
- add more specific details about required permissions for the AWS IAM

The pipeline uses AWS Transcribe, and for now, you MUST set your aws credentials in the env, e.g. either export them to your shell

```bash
export AWS_ACCESS_KEY_ID=<your iam id>
export AWS_SECRET_ACCESS_KEY==<your iam key>
```

...or just pass both of the above to every make call.


#### Build and Test a Mentor

If raw video, audio and timestamp files for a mentor are stored in S3 (more on this
Expand All @@ -31,7 +47,7 @@ Note that videos are not required to generate a classifier to a new mentor.

##### Create/update the training data
```bash
make data/mentors/{mentor_id}
make data/mentors-{mentor_id}
```

##### Train {mentor} classifier
Expand Down Expand Up @@ -77,3 +93,83 @@ for each question. Timestamp files should be in a CSV file of the following form
|----------|------------------|----------|----------------------|----------------------|
| (string) | (char: A/U) | (string) | (timestamp HH:MM:SS) | (timestamp HH:MM:SS) |

DEV
---

### Make/ENV variables for DEV

When you're working on the mentor-pipeline tool set, you frequently want to run `make` rules with a local build of the `mentor-pipeline` docker image and/or local copies of the python source for its python modules. The `Makefile` has a number of variables to support local development.



**NOTE**

All the examples below prepend variables to a `make` call, but you can also always `export` any of these variables once and they will stay in effect for the remainder of your shell session, e.g.

```bash
export DOCKER_IMAGE=mentor-pipeline:latest
# then later ...
make shell
```

...instead of

```bash
DOCKER_IMAGE=mentor-pipeline:latest make shell
```

#### DOCKER_IMAGE

Change the `mentor-pipeline` docker image from the current published release, e.g.

Change the docker image for a single make call like this

```bash
DOCKER_IMAGE=mentor-pipeline:latest make shell
```

...or configure it for your shell session like this

```bash
export DOCKER_IMAGE=mentor-pipeline:latest
```

#### DEV_ENABLED

Set `DEV_ENABLED` to have `make` rules run with local source for python modules. Will only use local sources for specific modules if the source is found at default (or configured) paths (details below)

```
DEV_ENABLED=1 make shell
```

#### DEV_ROOT

A default root for all python modules in dev. If you have set `DEV_ENABLED=1` and any of the python modules listed below are cloned there, they will automatically be includes. The default value for `DEV_ROOT` is `~/projects`

#### DEV_MENTOR_PIPELINE

Override the path to where [mentor-pipeline](https://github.com/ICTLearningSciences/mentor-pipeline) is cloned. ***NOTE*** source will only be used if `DEV_ENABLED=1`

#### DEV_TRANSCRIBE

Override the path to where [py-transcribe](https://github.com/ICTLearningSciences/py-transcribe) is cloned. ***NOTE*** source will only be used if `DEV_ENABLED=1`

#### DEV_TRANSCRIBE_AWS

Override the path to where [py-transcribe-aws](https://github.com/ICTLearningSciences/py-transcribe-aws) is cloned. ***NOTE*** source will only be used if `DEV_ENABLED=1`

### Running mentor-pipeline docker shell

You can open a shell to the pipeline docker image like this:

```bash
make shell
```

All dev variables described above will apply to the shell.

Once in the docker shell, you can run the pipeline script directly, e.g.

```
python mentor_pipeline_runner.py --mentor some_mentor_id --data-update --data=/app/mounts/data/mentors
```
Binary file removed mentors/data/mentors/carlos/data/npceditor_data.xlsx
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed mentors/data/mentors/clint/data/npceditor_data.xlsx
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 39e3ba8

Please sign in to comment.