Module 3 practice (#8)

kyryl-opens-ml · Jul 11, 2024 · 9141fa7 · 9141fa7
1 parent e8997ee
commit 9141fa7
Show file tree

Hide file tree

Showing 41 changed files with 186 additions and 1,756 deletions.
diff --git a/.github/workflows/module-1-advanced.yaml b/.github/workflows/module-1-advanced.yaml
@@ -9,6 +9,8 @@ on:
   pull_request:
     branches:
       - main
+    paths:
+      - 'module-1/**'
 
 jobs:
 

diff --git a/.github/workflows/module-1-basic.yaml b/.github/workflows/module-1-basic.yaml
@@ -9,6 +9,9 @@ on:
   pull_request:
     branches:
       - main
+    paths:
+      - 'module-1/**'
+
 env:
   IMAGE_ML_APP: app-ml
   IMAGE_ML_WEB: app-web

diff --git a/.github/workflows/module-2.yaml b/.github/workflows/module-2.yaml
@@ -3,12 +3,15 @@ name: Module 2
 
 on:
 
-  pull_request:
+  push:
     branches:
       - main
-  push:
+
+  pull_request:
     branches:
       - main
+    paths:
+      - 'module-2/**'
 
 jobs:
 

diff --git a/.github/workflows/module-3.yaml b/.github/workflows/module-3.yaml
@@ -1,7 +1,14 @@
 name: Module 3
 
 on:
-  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+  pull_request:
+    branches:
+      - main
+
 
 env:
   IMAGE_MAIN_NAME: nlp-sample
@@ -15,21 +22,21 @@ jobs:
   test:
     runs-on: ubuntu-latest
     steps:
-      - name: Checkout 
-        uses: actions/checkout@v2
 
-      - name: Login to Docker Hub
-        uses: docker/login-action@v1
-        with:
-          username: ${{ secrets.DOCKER_HUB_USERNAME }}
-          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
+      - name: Checkout 
+        uses: actions/checkout@v4
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v1
 
+      # See explanation: https://github.com/orgs/community/discussions/25678 
+      - name: Clean disk
+        run: |
+          rm -rf /opt/hostedtoolcache
+
       - name: Build new
         run: |
-          docker build -f week-3/nlp-sample/Dockerfile -t nlp-sample:latest week-3/nlp-sample
+          docker build -f module-3/nlp-sample/Dockerfile -t nlp-sample:latest module-3/nlp-sample
 
       - name: Test style
         run: |
@@ -60,49 +67,3 @@ jobs:
       #     tags: ${{ secrets.DOCKER_HUB_USERNAME }}/${{ env.IMAGE_MAIN_NAME }}:${{ env.IMAGE_MAIN_TAG }}
       #     cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/${{ env.IMAGE_MAIN_NAME }}:buildcache
       #     cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/${{ env.IMAGE_MAIN_NAME }}:buildcache,mode=max
-
-  cml-test:
-
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-
-      - uses: iterative/setup-cml@v1
-
-      - name: Train model
-        run: |
-          docker build -f week-3/nlp-sample/Dockerfile -t nlp-sample:latest week-3/nlp-sample
-          docker run -v $PWD:/tmp/results -e WANDB_PROJECT=${{ secrets.WANDB_PROJECT }} -e WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} nlp-sample:latest make train_fast_ci
-
-      - name: Write CML report
-        env:
-          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          # Post reports as comments in GitHub PRs
-          # cat results.txt >> report.md
-          cml send-comment README.md
-
-  build-push-aim:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout 
-        uses: actions/checkout@v2
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v1
-        with:
-          username: ${{ secrets.DOCKER_HUB_USERNAME }}
-          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-
-      - name: Build
-        uses: docker/build-push-action@v2
-        with:
-          context: week-3/aim/
-          file: week-3/aim//Dockerfile
-          push: true
-          tags: ${{ secrets.DOCKER_HUB_USERNAME }}/${{ env.IMAGE_AIM_NAME }}:${{ env.IMAGE_AIM_TAG }}
-          cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/${{ env.IMAGE_AIM_NAME }}:buildcache
-          cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/${{ env.IMAGE_AIM_NAME }}:buildcache,mode=max          
diff --git a/module-3/PRACTICE.md b/module-3/PRACTICE.md
@@ -1,10 +1,10 @@
-# Practice 
+# Practice
 
-*** 
+***
 
 # H5: Training & Experiments
 
-## Reading list: 
+## Reading list:
 
 - [The Data Science Lifecycle Process](https://github.com/dslp/dslp#the-data-science-lifecycle-process)
 - [Structuring Your Project](https://docs.python-guide.org/writing/structure/)
@@ -39,15 +39,12 @@ You need to have a training pipeline for your model for this homework. You can t
 - PR6: Write code for distributed training with PyTorch, Accelerate, and Ray.
 - Public link to your W&B project with experiments. 
 
+## Criteria:
 
-## Criteria: 
-
-- 6 PRs are merged. 
+- 6 PRs are merged.
 - W&B project created.
 - Description of experiment section in the google doc.
 
-
-
 # H6: Testing & CI
 
 ## Reading list:
@@ -67,7 +64,6 @@ You need to have a training pipeline for your model for this homework. You can t
 - [Privacy Testing for Deep Learning](https://github.com/trailofbits/PrivacyRaven)
 - [Learning Interpretability Tool (LIT)](https://github.com/PAIR-code/lit)
 
-
 ## Task:
 
 You need to have a training pipeline for your model for this homework. You can take it from your test task for this course, bring your own or use this [code](https://github.com/huggingface/transformers/tree/main/examples/pytorch/text-classification) as an example.
@@ -82,5 +78,5 @@ You need to have a training pipeline for your model for this homework. You can t
 
 ## Criteria:
 
-- 6 PRs merged 
+- 6 PRs merged.
 - Testing plan in the google doc.
diff --git a/module-3/README.md b/module-3/README.md
@@ -2,112 +2,70 @@
 
 ![alt text](./../docs/experiments.jpg)
 
-# Practice 
+# Practice
 
 [Practice task](./PRACTICE.md)
 
-*** 
+***
 
 # Reference implementation
 
-*** 
+***
 
-
-# Project stucture 
+## Project stucture
 
 - [Python project](https://github.com/navdeep-G/samplemod.git)
 - [ML project](https://github.com/ashleve/lightning-hydra-template.git)
 - [Advanced features](https://github.com/Lightning-AI/lightning)
 
-# Configuration 
+## Styling
 
-[hydra](https://hydra.cc/docs/intro/)
+[ruff](https://github.com/astral-sh/ruff)
 
 
-# Example ML model with testing
+## Configuration
 
-[nlp-sample](./nlp-sample)
+[hydra](https://hydra.cc/docs/intro/)
 
-# Experiments
+## Experiments management
 
 https://neptune.ai/blog/best-ml-experiment-tracking-tools
 
-## AIM 
-
-https://github.com/aimhubio/aim
-
-
-```
-kubectl create -f aim/deployment-aim-web.yaml
-kubectl port-forward svc/my-aim-service  8080:80 --namespace default
-```
-
-
-# Model card
-
-- https://github.com/ivylee/model-cards-and-datasheets
-- https://arxiv.org/abs/1810.03993
-
-
-# LLMs for everything
-
-
-## LoRA & Peft
+## Model card
 
-- https://www.anyscale.com/blog/fine-tuning-llms-lora-or-full-parameter-an-in-depth-analysis-with-llama-2
-- https://github.com/huggingface/peft
+- [Model Cards for Model Reporting](https://arxiv.org/abs/1810.03993)
+- [A collection of machine learning model cards and datasheets.](https://github.com/ivylee/model-cards-and-datasheets)
+- [GPT-4o](https://openai.com/index/hello-gpt-4o/)
+- [GPT-4 System Card](https://cdn.openai.com/papers/gpt-4-system-card.pdf)
 
-## Experiments 
+## Classic example: BERT-based training
 
-- https://github.com/georgian-io/LLM-Finetuning-Hub
-- https://medium.com/georgian-impact-blog/the-practical-guide-to-llms-llama-2-cdf21d540ce3
-
-## Run example
-
-```
-python lora_training/mistral_classification.py training-llm --pretrained-ckpt mistralai/Mistral-7B-v0.1 --epochs 1 --train-sample-fraction 0.3
-python lora_training/mistral_classification.py training-llm --pretrained-ckpt facebook/opt-350m --epochs 1 --train-sample-fraction 0.3
-
-python lora_training/mistral_classification.py inference-llm
-```
-
-
-https://github.com/brevdev/notebooks/blob/main/mistral-finetune-own-data.ipynb
-
-## Run example RLHF
-
-
-```
-docker build -t rlhf:latest .
-docker run --net=host --gpus all -it -v ${PWD}:/main rlhf:latest /bin/bash
-
-accelerate config
-python sft_llama2.py
-
-```
+[nlp-sample](./nlp-sample)
 
-https://github.com/huggingface/trl/tree/main/examples/research_projects/stack_llama_2/scripts
+https://huggingface.co/models?sort=downloads
 
+## Modern example: GenAI-based training
 
-## Eval:
+TODO
 
-- https://github.com/explodinggradients/ragas
-- https://github.com/NVIDIA/NeMo-Guardrails
-- https://github.com/guardrail-ml/guardrail
-- https://github.com/promptfoo/promptfoo
-- https://github.com/confident-ai/deepeval
+- https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/tree/main
+- https://github.com/microsoft/Phi-3CookBook
+- https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard
 
 
+## LLM API testing
 
-```
-pip install nemoguardrails
-pip install openai
-export OPENAI_API_KEY=**********
-```
 
+- [deepeval](https://github.com/confident-ai/deepeval)
+- [promptfoo](https://github.com/promptfoo/promptfoo)
+- [LLM Testing in 2024: Top Methods and Strategies](https://www.confident-ai.com/blog/llm-testing-in-2024-top-methods-and-strategies)
+- [uptrain](https://github.com/uptrain-ai/uptrain)
+- [ragas](https://github.com/explodinggradients/ragas)
+- [NeMo Guardrails](https://github.com/NVIDIA/NeMo-Guardrails)
+- [Automated Unit Test Improvement using Large Language Models at Meta](https://arxiv.org/abs/2402.09171)
 
 
-# Distributed training 
+# Distributed training
 
 - https://www.anyscale.com/blog/what-is-distributed-training
 - https://www.anyscale.com/blog/training-175b-parameter-language-models-at-1000-gpu-scale-with-alpa-and-ray
@@ -119,8 +77,3 @@ export OPENAI_API_KEY=**********
 
 - https://github.com/microsoft/nni
 - https://github.com/autogluon/autogluon
-
-
-# Declarative ML
-
-https://predibase.com/blog/how-to-fine-tune-llama-2-on-your-data-with-scalable-llm-infrastructure
diff --git a/module-3/aim/Dockerfile b/module-3/aim/Dockerfile
diff --git a/module-3/aim/deployment-aim-web.yaml b/module-3/aim/deployment-aim-web.yaml
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,8 @@ on: @@
       pull_request:
         branches:
           - main
+        paths:
+          - 'module-1/**'
     jobs:
@@ Expand Down @@