From 6138439df4212951409e77cdf028a42b9492938c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Fri, 1 Nov 2024 00:26:53 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=93=20Specify=20and=20test=20min=20ver?= =?UTF-8?q?sions=20(#2303)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add conditional check for LLMBlender availability in test_judges.py * Fix import issues and update test requirements * Remove unused imports * Add require_peft decorator to test cases * Fix import_utils module to use correct package name for llm_blender * Found min version and test * Update Slack notification titles * Update dependencies versions * Update GitHub Actions workflow to include setup.py and reorder file paths * Revert "Update Slack notification titles" This reverts commit be02a7f2de87905e86a847540770968d0416934a. * Update Slack notification titles * Remove pull_request branch restriction in tests.yml * add check code quality back * Fix PairRMJudge model loading issue --- .github/workflows/tests.yml | 60 +++++++++++++++++++++++++++++++++---- setup.py | 4 +-- tests/test_judges.py | 23 +++++++------- trl/trainer/ppo_trainer.py | 2 +- 4 files changed, 70 insertions(+), 19 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ffa634ad15..c2fb576a48 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,20 +4,36 @@ on: push: branches: [ main ] pull_request: - branches: [ main ] paths: # Run only when relevant files are modified - - "trl/**.py" + - ".github/**.yml" - "examples/**.py" - "scripts/**.py" - - ".github/**.yml" - "tests/**.py" + - "trl/**.py" + - "setup.py" env: TQDM_DISABLE: 1 CI_SLACK_CHANNEL: ${{ secrets.CI_PUSH_MAIN_CHANNEL }} jobs: + check_code_quality: + name: Check code quality + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: 3.12 + - uses: pre-commit/action@v3.0.1 + with: + extra_args: --all-files + tests: name: Tests strategy: @@ -49,7 +65,7 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ env.CI_SLACK_CHANNEL }} - title: 🤗 Results of the TRL CI with dev dependencies + title: Results with ${{ matrix.python-version }} on ${{ matrix.os }} with lastest dependencies status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} @@ -81,7 +97,7 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ env.CI_SLACK_CHANNEL }} - title: 🤗 Results of the TRL CI with dev dependencies + title: Results with ${{ matrix.python-version }} on ${{ matrix.os }} with dev dependencies status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} @@ -110,6 +126,38 @@ jobs: uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ env.CI_SLACK_CHANNEL }} - title: 🤗 Results of the TRL CI with dev dependencies + title: Results with ${{ matrix.python-version }} on ${{ matrix.os }} without optional dependencies status: ${{ job.status }} slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + + tests_min_versions: + name: Tests with minimum versions + runs-on: 'ubuntu-latest' + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: "pip" + cache-dependency-path: | + setup.py + requirements.txt + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install accelerate==0.34.0 + python -m pip install datasets==2.21.0 + python -m pip install transformers==4.46.0 + python -m pip install ".[dev]" + - name: Test with pytest + run: | + make test + - name: Post to Slack + if: github.ref == 'refs/heads/main' && always() # Check if the branch is main + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: ${{ env.CI_SLACK_CHANNEL }} + title: Results with ${{ matrix.python-version }} on ${{ matrix.os }} with minimum versions + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} \ No newline at end of file diff --git a/setup.py b/setup.py index ee58373ed1..781b036cfe 100644 --- a/setup.py +++ b/setup.py @@ -76,8 +76,8 @@ __version__ = "0.12.0.dev0" # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) REQUIRED_PKGS = [ - "accelerate", - "datasets", + "accelerate>=0.34.0", + "datasets>=2.21.0", "rich", # rich shouldn't be a required package for trl, we should remove it from here "transformers>=4.46.0", ] diff --git a/tests/test_judges.py b/tests/test_judges.py index 1fe65f018a..748cc85666 100644 --- a/tests/test_judges.py +++ b/tests/test_judges.py @@ -12,21 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time import unittest -from trl import HfPairwiseJudge, PairRMJudge, RandomPairwiseJudge, RandomRankJudge, is_llm_blender_available +from trl import HfPairwiseJudge, PairRMJudge, RandomPairwiseJudge, RandomRankJudge from .testing_utils import require_llm_blender class TestJudges(unittest.TestCase): - @classmethod - def setUpClass(cls): - # Initialize once to download the model. This ensures it’s downloaded before running tests, preventing issues - # where concurrent tests attempt to load the model while it’s still downloading. - if is_llm_blender_available(): - PairRMJudge() - def _get_prompts_and_completions(self): prompts = ["The capital of France is", "The biggest planet in the solar system is"] completions = [["Paris", "Marseille"], ["Saturn", "Jupiter"]] @@ -56,9 +50,18 @@ def test_hugging_face_judge(self): self.assertTrue(all(isinstance(rank, int) for rank in ranks)) self.assertEqual(ranks, [0, 1]) + def load_pair_rm_judge(self): + # When using concurrent tests, PairRM may fail to load the model while another job is still downloading. + # This is a workaround to retry loading the model a few times. + for _ in range(5): + try: + return PairRMJudge() + except ValueError: + time.sleep(5) + @require_llm_blender def test_pair_rm_judge(self): - judge = PairRMJudge() + judge = self.load_pair_rm_judge() prompts, completions = self._get_prompts_and_completions() ranks = judge.judge(prompts=prompts, completions=completions) self.assertEqual(len(ranks), 2) @@ -67,7 +70,7 @@ def test_pair_rm_judge(self): @require_llm_blender def test_pair_rm_judge_return_scores(self): - judge = PairRMJudge() + judge = self.load_pair_rm_judge() prompts, completions = self._get_prompts_and_completions() probs = judge.judge(prompts=prompts, completions=completions, return_scores=True) self.assertEqual(len(probs), 2) diff --git a/trl/trainer/ppo_trainer.py b/trl/trainer/ppo_trainer.py index 5b3b6f05f5..872f83c135 100644 --- a/trl/trainer/ppo_trainer.py +++ b/trl/trainer/ppo_trainer.py @@ -131,7 +131,7 @@ def __init__( self.data_collator = data_collator self.eval_dataset = eval_dataset self.optimizer, self.lr_scheduler = optimizers - self.optimizer_cls_and_kwargs = None # needed for transformers >= 4.47 + self.optimizer_cls_and_kwargs = None # needed for transformers >= 4.47 ######### # calculate various batch sizes