diff --git a/dizoo/gym_anytrading/envs/README.md b/dizoo/gym_anytrading/envs/README.md
index 699f78b2c1..0be3fe219b 100644
--- a/dizoo/gym_anytrading/envs/README.md
+++ b/dizoo/gym_anytrading/envs/README.md
@@ -59,7 +59,7 @@ If profit or loss occurs, it means that one of the following two cycles in state
 
 According to the above definition, we can easily know that the formula of accumulative profit is: 
 
-$\prod_{buying\ long}(r_{curr}/r_{pre}\ *\ cost) + \prod_{short\ selling}((2-r_{curr}/r_{pre})\ *\ cost)$
+$\prod_{buying\ long}(r_{curr}/r_{pre}\ *\ cost) * \prod_{short\ selling}((2-r_{curr}/r_{pre})\ *\ cost)$
 
 
 ### Reward Function
diff --git a/dizoo/gym_anytrading/envs/statemachine.png b/dizoo/gym_anytrading/envs/statemachine.png
index 2c355939f1..bbc3d39d89 100644
Binary files a/dizoo/gym_anytrading/envs/statemachine.png and b/dizoo/gym_anytrading/envs/statemachine.png differ
diff --git a/dizoo/gym_anytrading/envs/stocks_env.py b/dizoo/gym_anytrading/envs/stocks_env.py
index a71ae3f59b..8d34caa182 100644
--- a/dizoo/gym_anytrading/envs/stocks_env.py
+++ b/dizoo/gym_anytrading/envs/stocks_env.py
@@ -69,7 +69,7 @@ def _process_data(self, start_idx: int = None) -> Any:
         # validate index
         if start_idx is None:
             if self.train_range == None or self.test_range == None:
-                self.start_idx = np.random.randint(self.window_size, len(self.df) - self._cfg.eps_length)
+                self.start_idx = np.random.randint(self.window_size - 1, len(self.df) - self._cfg.eps_length)
             elif self._env_id[-1] == 'e':
                 boundary = int(len(self.df) * (1 + self.test_range))
                 assert len(self.df) - self._cfg.eps_length > boundary + self.window_size,\