diff --git a/dizoo/gym_anytrading/envs/README.md b/dizoo/gym_anytrading/envs/README.md index 699f78b2c1..0be3fe219b 100644 --- a/dizoo/gym_anytrading/envs/README.md +++ b/dizoo/gym_anytrading/envs/README.md @@ -59,7 +59,7 @@ If profit or loss occurs, it means that one of the following two cycles in state According to the above definition, we can easily know that the formula of accumulative profit is: -$\prod_{buying\ long}(r_{curr}/r_{pre}\ *\ cost) + \prod_{short\ selling}((2-r_{curr}/r_{pre})\ *\ cost)$ +$\prod_{buying\ long}(r_{curr}/r_{pre}\ *\ cost) * \prod_{short\ selling}((2-r_{curr}/r_{pre})\ *\ cost)$ ### Reward Function diff --git a/dizoo/gym_anytrading/envs/statemachine.png b/dizoo/gym_anytrading/envs/statemachine.png index 2c355939f1..bbc3d39d89 100644 Binary files a/dizoo/gym_anytrading/envs/statemachine.png and b/dizoo/gym_anytrading/envs/statemachine.png differ diff --git a/dizoo/gym_anytrading/envs/stocks_env.py b/dizoo/gym_anytrading/envs/stocks_env.py index a71ae3f59b..8d34caa182 100644 --- a/dizoo/gym_anytrading/envs/stocks_env.py +++ b/dizoo/gym_anytrading/envs/stocks_env.py @@ -69,7 +69,7 @@ def _process_data(self, start_idx: int = None) -> Any: # validate index if start_idx is None: if self.train_range == None or self.test_range == None: - self.start_idx = np.random.randint(self.window_size, len(self.df) - self._cfg.eps_length) + self.start_idx = np.random.randint(self.window_size - 1, len(self.df) - self._cfg.eps_length) elif self._env_id[-1] == 'e': boundary = int(len(self.df) * (1 + self.test_range)) assert len(self.df) - self._cfg.eps_length > boundary + self.window_size,\