-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Joel Lamy-Poirier <[email protected]>
- Loading branch information
1 parent
7989595
commit 2905d38
Showing
20 changed files
with
496 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,39 @@ | ||
# syntax=docker/dockerfile:1.7-labs | ||
FROM nvcr.io/nvidia/pytorch:24.07-py3 | ||
|
||
# Install git-lfs for Huggingface hub interaction and sudo for system adjustments | ||
# Install dependencies. | ||
RUN apt-get update \ | ||
&& apt-get install --no-install-recommends -y git-lfs sudo util-linux \ | ||
&& apt-get install --no-install-recommends -y acl git-lfs \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& git lfs install | ||
|
||
# Add a user for Fast-LLM with sudo privileges for runtime adjustments | ||
ARG FAST_LLM_USER_ID=1000 | ||
RUN useradd -m -u $FAST_LLM_USER_ID -s /bin/bash fast_llm \ | ||
&& echo 'fast_llm ALL=(ALL) NOPASSWD: ALL' >> /etc/sudoers | ||
|
||
USER fast_llm | ||
# Set the working directory. | ||
WORKDIR /app | ||
# Set the permission to 777 for all files and directories in `/app`, `/home` and python install directories: | ||
# 1. Create directories explicitly because docker use the wrong permission for explicit creation. | ||
# 2. For the rest, set the default ACL to 777 for all users. | ||
RUN mkdir -m 777 /app/Megatron-LM /app/examples /app/fast_llm /app/tests /app/tools \ | ||
&& setfacl -m d:u::rwx,d:g::rwx,d:o::rwx,u::rwx,g::rwx,o::rwx \ | ||
/app \ | ||
/home \ | ||
/usr \ | ||
/usr/local \ | ||
/usr/local/bin \ | ||
/usr/local/lib \ | ||
/usr/local/lib/python3.10 \ | ||
/usr/local/lib/python3.10/dist-packages \ | ||
/usr/local/lib/python3.10/dist-packages/__pycache__ | ||
|
||
# Environment settings for Python and PATH | ||
ENV PYTHONPATH=/app:/app/Megatron-LM \ | ||
PATH=$PATH:/home/fast_llm/.local/bin/ | ||
|
||
# Copy the dependency files and install dependencies | ||
COPY --chown=fast_llm setup.py setup.cfg pyproject.toml ./ | ||
COPY --chown=fast_llm ./fast_llm/csrc/ fast_llm/csrc/ | ||
RUN PIP_NO_INPUT=1 pip3 install --no-cache-dir --no-build-isolation -e ".[CORE,OPTIONAL,DEV]" | ||
# Copy dependency files with universal write permissions for all users. | ||
COPY --chmod=777 setup.py setup.cfg pyproject.toml ./ | ||
COPY --chmod=777 ./fast_llm/csrc/ fast_llm/csrc/ | ||
|
||
# Copy the rest of the code | ||
COPY --chown=fast_llm ./Megatron-LM Megatron-LM | ||
COPY --chown=fast_llm ./examples examples | ||
COPY --chown=fast_llm ./tests tests | ||
COPY --chown=fast_llm ./tools tools | ||
# Install dependencies within the virtual environment. | ||
RUN pip install --no-cache-dir --no-build-isolation -e ".[CORE,OPTIONAL,DEV]" | ||
|
||
# Copy the main source code for Fast-LLM | ||
COPY --exclude=./fast_llm/csrc/ --chown=fast_llm ./fast_llm/ fast_llm/ | ||
# Copy the remaining source code with universal write permissions. | ||
COPY --chmod=777 ./Megatron-LM Megatron-LM | ||
COPY --chmod=777 ./examples examples | ||
COPY --chmod=777 ./tests tests | ||
COPY --chmod=777 ./tools tools | ||
COPY --chmod=777 --exclude=./fast_llm/csrc/ ./fast_llm/ fast_llm/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from fast_llm.data.preparator.gpt_memmap.config import GPTMemmapDatasetPreparatorConfig | ||
from fast_llm.utils import Registry | ||
|
||
dataset_preparator_registry = Registry( | ||
"DatasetPreparator", | ||
{ | ||
dataset_preparator.preparator_name: dataset_preparator | ||
for dataset_preparator in [ | ||
GPTMemmapDatasetPreparatorConfig, | ||
] | ||
}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import abc | ||
import argparse | ||
import typing | ||
|
||
from fast_llm.config import config_class | ||
from fast_llm.engine.config_utils.runnable import RunnableConfig | ||
from fast_llm.utils import Assert | ||
|
||
|
||
@config_class() | ||
class DatasetPreparatorConfig(RunnableConfig): | ||
preparator_name: typing.ClassVar[str] | ||
|
||
@classmethod | ||
def get_dataset_preparator_class(cls) -> type["DatasetPreparator"]: | ||
raise NotImplementedError | ||
|
||
def _get_runnable(self, parsed: argparse.Namespace) -> typing.Callable[[], None]: | ||
dataset_preparator = self.get_dataset_preparator_class()(config=self) | ||
return dataset_preparator.run | ||
|
||
|
||
class DatasetPreparator(abc.ABC): | ||
_config: DatasetPreparatorConfig | ||
config_class: typing.ClassVar[type[DatasetPreparatorConfig]] = DatasetPreparatorConfig | ||
|
||
def __init__(self, config: DatasetPreparatorConfig) -> None: | ||
Assert.custom(isinstance, config, self.config_class) | ||
config.validate() | ||
self._config = config | ||
|
||
@abc.abstractmethod | ||
def run(self) -> None: | ||
raise NotImplementedError |
Empty file.
Oops, something went wrong.