wip

CarperAI · Mar 5, 2023 · dd0ecc7 · dd0ecc7
1 parent 21e4943
commit dd0ecc7
Show file tree

Hide file tree

Showing 57 changed files with 16,817 additions and 15,641 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,10 @@
+[paths]
+source = src
+
+[run]
+branch = true
+source = src/apologies
+
+[report]
+show_missing = false
+precision = 1
diff --git a/Makefile b/Makefile
@@ -0,0 +1,11 @@
+gendoc:
+	docker build -t trlxgendocs -f docker/docs/Dockerfile .
+run:
+	docker run --rm -it \
+		-p 8000:8000 \
+		--entrypoint python trlxgendocs -m http.server 8000 --directory build/html
+
+sh:
+	docker run --rm -it -v ${PWD}:/build  \
+		-p 8000:8000 \
+		--entrypoint /bin/bash trlxgendocs
diff --git a/README.md b/README.md
@@ -1,12 +1,12 @@
+# Transformer Reinforcement Learning X
+
 ![TRLX](./docs/_static/apple-touch-icon-114x114.png)
 
-[docs-image]: https://readthedocs.org/projects/trlX/badge/?version=latest
-[docs-url]: https://trlX.readthedocs.io/en/latest/?badge=latest
+[!docs-image](https://readthedocs.org/projects/trlX/badge/?version=latest)
+[!docs-url](https://trlX.readthedocs.io/en/latest/?badge=latest)
 
 [![DOI](https://zenodo.org/badge/545104023.svg)](https://zenodo.org/badge/latestdoi/545104023)
 
-# Transformer Reinforcement Learning X
-
 trlX is a distributed training framework designed from the ground up to focus on fine-tuning large language models with reinforcement learning using either a provided reward function or a reward-labeled dataset.
 
 Training support for 🤗 Hugging Face models is provided by [Accelerate](https://huggingface.co/docs/accelerate/)-backed trainers, allowing users to fine-tune causal and T5-based language models of up to 20B parameters, such as `facebook/opt-6.7b`, `EleutherAI/gpt-neox-20b`, and `google/flan-t5-xxl`. For models beyond 20B parameters, trlX provides [NVIDIA NeMo](https://github.com/NVIDIA/NeMo)-backed trainers that leverage efficient parallelism techniques to scale effectively.

diff --git a/docker/docs/Dockerfile b/docker/docs/Dockerfile
@@ -0,0 +1,17 @@
+FROM python:3.8-slim
+
+# pip install -r docs/requirements.txt
+# sphinx-build -b html docs docs/build/html -j auto
+# sphinx-build -b html -D nb_execution_mode=off docs docs/build/html -j auto
+
+RUN python -m pip install --upgrade --no-cache-dir pip
+ADD docs/requirements.txt /tmp/requirements.txt
+RUN python -m pip install --exists-action=w --no-cache-dir -r /tmp/requirements.txt
+RUN apt-get update && apt-get install make -y --no-install-recommends \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+RUN mkdir /build
+WORKDIR /build/
+ADD docs .
+RUN make html
+ENTRYPOINT [ "python", "-m", "http.server", "8000" ]
diff --git a/docs/Makefile b/docs/Makefile
@@ -14,7 +14,9 @@ help:
 
 .PHONY: help Makefile
 
+prepare:
+	cp ../examples/notebooks/*.ipynb .
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
+%: Makefile prepare
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/README.md b/docs/README.md
@@ -1,5 +1,5 @@
 # How To build the documentation
 
 ```bash
-make -c docs html
+make html
 ```
diff --git a/docs/_static/style.css b/docs/_static/style.css
@@ -1,7 +1,5 @@
-@import url("theme.css");
-
 :root {
-  --block-bg-opacity: .5;
+  --block-bg-opacity: 0.5;
 }
 
 .wy-side-nav-search {
@@ -20,7 +18,6 @@
   background-color: rgba(171, 0, 182, var(--block-bg-opacity));
 }
 
-.key-ideas
-{
-  border: 0px
+.key-ideas {
+  border: 0px;
 }
diff --git a/docs/index.rst b/docs/index.rst
@@ -16,17 +16,21 @@ Installation
 
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
    :caption: Contents:
 
-   index
    data
-   models
-   orchestrator
    configs
    pipeline
    trainer
-   examples
+
+.. toctree::
+   :hidden:
+   :maxdepth: 1
+   :caption: Examples
+
+   trlx_simulacra.ipynb
+   trlx_sentiments.ipynb
 
 .. toctree::
    :hidden:

diff --git a/docs/models.md b/docs/models.md
diff --git a/docs/orchestrator.rst b/docs/orchestrator.rst
diff --git a/docs/pipeline.md → docs/pipeline.rst b/docs/pipeline.md → docs/pipeline.rst
@@ -1,27 +1,30 @@
-# Pipelines and Rollout Store
+.. _pipeline:
 
-## Pipelines
+Pipelines and Rollout Store
+***************************
 
-Pipelines in trlX provide a way to read from a dataset. They are used to fetch data from the dataset and feed it to the models for training or inference. The pipelines allow for efficient processing of the data and ensure that the models have access to the data they need for their tasks.
-
-## Rollout Stores
+*Pipelines*
 
-Rollout stores in trlX are used to store experiences created for the models by the orchestrator. The experiences in the rollout stores serve as the training data for the models. The models use the experiences stored in their rollout stores to learn and improve their behavior. The rollout stores provide a convenient and efficient way for the models to access the experiences they need for training.
-
-## General
+Pipelines in trlX provide a way to read from a dataset. They are used to fetch data from the dataset and feed it to the models for training or inference. The pipelines allow for efficient processing of the data and ensure that the models have access to the data they need for their tasks.
 
 .. autoclass:: trlx.pipeline.BasePipeline
     :members:
 
 .. autoclass:: trlx.pipeline.BaseRolloutStore
     :members:
 
-## PPO
+
+*Rollout Stores*
+
+Rollout stores in trlX are used to store experiences created for the models by the orchestrator. The experiences in the rollout stores serve as the training data for the models. The models use the experiences stored in their rollout stores to learn and improve their behavior. The rollout stores provide a convenient and efficient way for the models to access the experiences they need for training.
+
+
+**PPO**
 
 .. autoclass:: trlx.pipeline.ppo_pipeline.PPORolloutStorage
     :members:
 
-## ILQL
+**ILQL**
 
 .. autoclass:: trlx.pipeline.offline_pipeline.PromptPipeline
     :members:

diff --git a/examples/experiments/grounded_program_synthesis/lang.py b/examples/experiments/grounded_program_synthesis/lang.py
@@ -326,9 +326,7 @@ def sample_production(self, gen_length: int = 5):
                 init_flag = False
             else:
                 random_chosen_function = random.choice(self.production_idt)
-                generated_function = self.production_gen_list[random_chosen_function](
-                    hash_functions[-1]["function_template"]
-                )
+                generated_function = self.production_gen_list[random_chosen_function](hash_functions[-1]["function_template"])
                 if generated_function["output"] == "ERROR":
                     break
                 hash_functions.append(generated_function)

diff --git a/examples/hh/ppo_hh.py b/examples/hh/ppo_hh.py
@@ -168,9 +168,7 @@ def forward(self, input_ids):
 
         def reward_fn(samples, prompts, outputs):
             samples = [s + reward_tokenizer.eos_token for s in samples]
-            input = reward_tokenizer(samples, padding=True, truncation=True, max_length=1024, return_tensors="pt").to(
-                device
-            )
+            input = reward_tokenizer(samples, padding=True, truncation=True, max_length=1024, return_tensors="pt").to(device)
 
             mbs = 24
             out = []

diff --git a/examples/nemo_ilql_inference.py b/examples/nemo_ilql_inference.py
@@ -2,9 +2,7 @@
 import sys
 from glob import glob
 
-from nemo.collections.nlp.modules.common.megatron.megatron_init import (
-    fake_initialize_model_parallel,
-)
+from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
 from nemo.utils.app_state import AppState
 from nemo.utils.model_utils import inject_model_parallel_rank
 from omegaconf.omegaconf import OmegaConf
@@ -49,9 +47,7 @@ def main(megatron_cfg_path, checkpoint_path):
 
     # Manually set up the TP and PP groups
     app_state = AppState()
-    app_state.model_parallel_size = (
-        megatron_cfg.model.tensor_model_parallel_size * megatron_cfg.model.pipeline_model_parallel_size
-    )
+    app_state.model_parallel_size = megatron_cfg.model.tensor_model_parallel_size * megatron_cfg.model.pipeline_model_parallel_size
     app_state.tensor_model_parallel_size = megatron_cfg.model.tensor_model_parallel_size
     app_state.pipeline_model_parallel_size = megatron_cfg.model.pipeline_model_parallel_size
     (