davidADSP · zorgluf · Mar 28, 2024 · Mar 28, 2024 · Apr 2, 2024 · Apr 4, 2024
diff --git a/.github/publish-image.yaml b/.github/publish-image.yaml
@@ -0,0 +1,20 @@
+name: Publish frouge image
+on:
+  push:
+    branches:
+      - 'main'
+      - 'multiple-upgrades'
+
+jobs:
+  publish-image:
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout
+        uses: actions/checkout@v3
+      - name: build
+        run: |
+          docker build . -t zorgluf/simple-play-frouge:latest -f app/Dockerfile_play_frouge
+      - name: publish
+        run: |
+          docker login -u zorgluf -p ${{ secrets.DOCKER_HUB_TOKEN }}
+          docker push zorgluf/simple-play-frouge:latest
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 private
 archive
+venv
 
 .DS_Store
 # Documentatino notebooks since they're copied over when the build_docs script

diff --git a/README.md b/README.md
@@ -106,6 +106,10 @@ Install [Docker](https://github.com/davidADSP/SIMPLE/issues) and [Docker Compose
    bash ./scripts/install_env.sh sushigo
    ```
 
+Build standalone docker play images :
+  ```sh
+  docker build . -t simple-play-frouge -f app/Dockerfile_play_frouge
+  ```
 ---
 <!-- TUTORIAL -->
 ## Tutorial
@@ -127,6 +131,11 @@ For example, try the following command to play against a baseline random model i
    docker-compose exec app python3 test.py -d -g 1 -a base base human -e sushigo 
    ```
 
+To start web-gui mode :
+  ```sh
+   docker-compose exec app python3 test.py -rm human_web -g 1 -a base base human base base -e frouge 
+   ```
+
 #### `train.py` 
 
 This entrypoint allows you to start training the AI using selfplay PPO. The underlying PPO engine is from the [Stable Baselines](https://stable-baselines.readthedocs.io/en/master/) package.

diff --git a/app/Dockerfile b/app/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:bionic-20200219 as base
+FROM ubuntu:jammy as base
 
 RUN apt-get update
 RUN apt-get -y install ssh

diff --git a/app/Dockerfile_play_frouge b/app/Dockerfile_play_frouge
@@ -0,0 +1,19 @@
+FROM python:3.10
+
+EXPOSE 8080
+
+RUN useradd -ms /bin/bash selfplay
+USER selfplay
+ENV PATH="/home/selfplay/.local/bin:${PATH}"
+WORKDIR /app
+
+COPY --chown=selfplay:selfplay ./app .
+RUN rm -rf ./app/zoo
+COPY --chown=selfplay:selfplay ./app/zoo/pretrained ./zoo/pretrained
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.python.org/simple
+RUN pip install --no-cache-dir -e ./environments/frouge
+
+CMD python3 test.py -rm human_web -g 1 -a best_model best_model human best_model best_model -e frouge
+#CMD bash
diff --git a/app/environments/frouge/frouge/__init__.py b/app/environments/frouge/frouge/__init__.py
@@ -1,7 +1,10 @@
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
 
 register(
     id='FlammeRouge-v0',
     entry_point='frouge.envs:FlammeRougeEnv',
 )
 
+# optimal training :
+#docker-compose exec app python3 train.py -r -e frouge -t 300 -os 12800 -ob 256 -dev cuda
+
diff --git a/app/environments/frouge/frouge/envs/classes.py b/app/environments/frouge/frouge/envs/classes.py
@@ -210,6 +210,17 @@ def map_to_board(self,board=None):
         a.array[self.r_position.col][self.r_position.row][0] = str(self.n) + "r"
         a.array[self.s_position.col][self.s_position.row][0] = str(self.n) + "s"
         return a
+
+    def nb_penalties(self):
+        #get nb of penalties card in playable cards
+        p = 0
+        p += self.r_deck.nb_penalties()
+        p += self.s_deck.nb_penalties()
+        p += self.r_discard.nb_penalties()
+        p += self.s_discard.nb_penalties()
+        p += self.r_hand.nb_penalties()
+        p += self.s_hand.nb_penalties()
+        return p
 
 
 class Position():
@@ -269,6 +280,15 @@ def array(self):
         for card in self.cards:
             array[ALL_CARDS.index(card)] += 0.1
         return array
+
+    def sum_values(self):
+        s = 0
+        for card in self.cards:
+            s += card.value
+        return s
+
+    def nb_penalties(self):
+        return len([ c for c in self.cards if "penalty" in c.name ])
 
 class Card():
     def __init__(self, name, value):
@@ -378,10 +398,10 @@ def move(self,player_id,c_type,n,aspiration=False):
             if start_cell == CD:
                 n = max(n,5)
             if start_cell == CSU:
-                n = max(n,5)
+                n = max(n,4)
             if start_cell == CC:
                 n = min(n,5)
-            if self.get_cell(player.c_pos(c_type).col+n,0) == CC:
+            if self.get_cell(player.c_pos(c_type).col+n,0) == CC or self.get_cell(player.c_pos(c_type).col+ int(n/2),0) == CC:
                 if n > 5:
                     n = 5
         self.set_cycl_to_pos(player_id, c_type, player.c_pos(c_type).col + n)