Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix learning on cuda #201

Merged
merged 10 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@ dmypy.json
.idea
.vscode
examples/inputs/learned_strategies
workshop/

examples/outputs
examples/local_db/
validation_runs
assume-db
forecasts_df.csv
forecasts_df.csv
2 changes: 1 addition & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ build:
python: mambaforge-4.10

conda:
environment: environment_docs.yml
environment: environment_docs.yaml
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ASSUME: Agent-Based Electricity Markets Simulation Toolbox

![Lint Status](https://github.com/assume-framework/assume/actions/workflows/lint-pytest.yml/badge.svg)
![Lint Status](https://github.com/assume-framework/assume/actions/workflows/lint-pytest.yaml/badge.svg)
[![Code Coverage](https://codecov.io/gh/assume-framework/assume/branch/main/graph/badge.svg?token=CZ4FO7P57H)](https://codecov.io/gh/assume-framework/assume)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8088760.svg)](https://doi.org/10.5281/zenodo.8088760)

Expand Down Expand Up @@ -114,7 +114,7 @@ pre-commit run --all-files
First, create an environment that includes the documentation dependencies:

```bash
conda env create -f environment_docs.yml
conda env create -f environment_docs.yaml
```

To generate or update the automatically created docs in `docs/source/assume*`, run:
Expand Down
6 changes: 3 additions & 3 deletions assume/common/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,8 +685,8 @@ class LearningConfig(TypedDict):
:type learning_rate: float
:param training_episodes: The number of training episodes.
:type training_episodes: int
:param episodes_collecting_initial_experience: The number of episodes collecting initial experience.
:type episodes_collecting_initial_experience: int
:param episodes_initial_experience: The number of episodes collecting initial experience.
:type episodes_initial_experience: int
:param train_freq: The training frequency.
:type train_freq: int
:param gradient_steps: The number of gradient steps.
Expand Down Expand Up @@ -716,7 +716,7 @@ class LearningConfig(TypedDict):
algorithm: str
learning_rate: float
training_episodes: int
episodes_collecting_initial_experience: int
episodes_initial_experience: int
nick-harder marked this conversation as resolved.
Show resolved Hide resolved
train_freq: int
gradient_steps: int
batch_size: int
Expand Down
6 changes: 5 additions & 1 deletion assume/common/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(
end: datetime,
db_engine=None,
export_csv_path: str = "",
save_frequency_hours: int = 24,
save_frequency_hours: int = None,
learning_mode: bool = False,
):
super().__init__()
Expand Down Expand Up @@ -202,7 +202,11 @@ async def store_dfs(self):
for table in self.write_dfs.keys():
if len(self.write_dfs[table]) == 0:
continue

# exclude all empty dataframes from self.write_dfs[table]
self.write_dfs[table] = [df for df in self.write_dfs[table] if not df.empty]
maurerle marked this conversation as resolved.
Show resolved Hide resolved
df = pd.concat(self.write_dfs[table], axis=0)

df.reset_index()
if df.empty:
continue
Expand Down
7 changes: 4 additions & 3 deletions assume/common/scenario_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ async def load_scenario_folder_async(

# load the config file
path = f"{inputs_path}/{scenario}"
with open(f"{path}/config.yml", "r") as f:
with open(f"{path}/config.yaml", "r") as f:
config = yaml.safe_load(f)
if not study_case:
study_case = list(config.keys())[0]
Expand Down Expand Up @@ -559,6 +559,7 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
act_dim=world.learning_role.act_dim,
n_rl_units=len(world.learning_role.rl_strats),
device=world.learning_role.device,
float_type=world.learning_role.float_type,
)
actors_and_critics = None
world.output_role.del_similar_runs()
Expand All @@ -584,7 +585,7 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
world.learning_role.buffer = buffer
world.learning_role.episodes_done = episode

if episode + 1 >= world.learning_role.episodes_collecting_initial_experience:
if episode + 1 > world.learning_role.episodes_initial_experience:
world.learning_role.turn_off_initial_exploration()

world.run()
Expand Down Expand Up @@ -623,7 +624,7 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
# as long as we do not skip setup container should be handled correctly
# if enough initial experience was collected according to specifications in learning config
# turn off initial exploration and go into full learning mode
if episode + 1 >= world.learning_role.episodes_collecting_initial_experience:
if episode + 1 >= world.learning_role.episodes_initial_experience:
world.learning_role.turn_off_initial_exploration()

# container shutdown implicitly with new initialisation
Expand Down
38 changes: 24 additions & 14 deletions assume/common/units_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,9 @@
self,
start: datetime,
marketconfig: MarketConfig,
action_dimension: int,
obs_dim: int,
act_dim: int,
device: str,
learning_unit_count: int,
):
learning_role_id = "learning_agent"
Expand All @@ -422,27 +424,28 @@
try:
import torch as th

all_actions = th.zeros(
(learning_unit_count, action_dimension), device="cpu"
)
except ImportError:
logger.error("tried writing learning_params, but torch is not installed")
all_actions = np.zeros((learning_unit_count, action_dimension))
all_actions = np.zeros((learning_unit_count, act_dim))
return

Check warning on line 430 in assume/common/units_operator.py

View check run for this annotation

Codecov / codecov/patch

assume/common/units_operator.py#L429-L430

Added lines #L429 - L430 were not covered by tests

all_observations = th.zeros((learning_unit_count, obs_dim), device=device)
all_actions = th.zeros((learning_unit_count, act_dim), device=device)

i = 0
for unit_id, unit in self.units.items():
for _, unit in self.units.items():
nick-harder marked this conversation as resolved.
Show resolved Hide resolved
# rl only for energy market for now!
if isinstance(
unit.bidding_strategies.get(marketconfig.product_type),
LearningStrategy,
):
all_observations.append(
np.array(unit.outputs["rl_observations"][start])
)
all_observations[i, :] = unit.outputs["rl_observations"][start]
all_actions[i, :] = unit.outputs["rl_actions"][start]
all_rewards.append(unit.outputs["reward"][start])
i += 1

# convert all_actions list of tensor to numpy 2D array
all_observations = all_observations.squeeze().cpu().numpy()
all_actions = all_actions.squeeze().cpu().numpy()
all_rewards = np.array(all_rewards)
rl_agent_data = (np.array(all_observations), all_actions, all_rewards)
Expand All @@ -467,16 +470,18 @@
:type marketconfig: MarketConfig
"""
learning_strategies = []
action_dimension = 0

for unit in self.units.values():
bidding_strategy = unit.bidding_strategies.get(marketconfig.product_type)
if isinstance(bidding_strategy, LearningStrategy):
learning_strategies.append(bidding_strategy)
# should be the same across all strategies
action_dimension = bidding_strategy.act_dim
obs_dim = bidding_strategy.obs_dim
act_dim = bidding_strategy.act_dim
device = bidding_strategy.device

# should write learning results if at least one bidding_strategy is a learning strategy
write_learning_results = len(learning_strategies) > 0 and orderbook
if write_learning_results:
if learning_strategies and orderbook:
start = orderbook[0]["start_time"]
# write learning output
self.write_learning_to_output(start, marketconfig)
Expand All @@ -486,5 +491,10 @@
if learning_strategies[0].learning_mode:
# in learning mode we are sending data to learning
self.write_to_learning(
start, marketconfig, action_dimension, len(learning_strategies)
start=start,
marketconfig=marketconfig,
obs_dim=obs_dim,
act_dim=act_dim,
device=device,
learning_unit_count=len(learning_strategies),
)
10 changes: 4 additions & 6 deletions assume/reinforcement_learning/algorithms/base_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ class RLAlgorithm:
:type learning_role: Learning Role object
:param learning_rate: learning rate for adam optimizer
:type learning_rate: float
:param episodes_collecting_initial_experience: how many steps of the model to collect transitions for before learning starts
:type episodes_collecting_initial_experience: int
:param episodes_initial_experience: how many steps of the model to collect transitions for before learning starts
:type episodes_initial_experience: int
:param batch_size: Minibatch size for each gradient update
:type batch_size: int
:param tau: the soft update coefficient ("Polyak update", between 0 and 1)
Expand All @@ -34,7 +34,7 @@ def __init__(
# init learning_role as object of Learning class
learning_role,
learning_rate=1e-4,
episodes_collecting_initial_experience=100,
episodes_initial_experience=100,
batch_size=1024,
tau=0.005,
gamma=0.99,
Expand All @@ -47,9 +47,7 @@ def __init__(

self.learning_role = learning_role
self.learning_rate = learning_rate
self.episodes_collecting_initial_experience = (
episodes_collecting_initial_experience
)
self.episodes_initial_experience = episodes_initial_experience
self.batch_size = batch_size
self.gamma = gamma
self.tau = tau
Expand Down
4 changes: 2 additions & 2 deletions assume/reinforcement_learning/algorithms/matd3.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(
self,
learning_role,
learning_rate=1e-4,
episodes_collecting_initial_experience=100,
episodes_initial_experience=100,
batch_size=1024,
tau=0.005,
gamma=0.99,
Expand All @@ -38,7 +38,7 @@ def __init__(
super().__init__(
learning_role,
learning_rate,
episodes_collecting_initial_experience,
episodes_initial_experience,
batch_size,
tau,
gamma,
Expand Down
7 changes: 5 additions & 2 deletions assume/reinforcement_learning/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(
act_dim: int,
n_rl_units: int,
device: str,
float_type,
):
self.buffer_size = buffer_size
self.obs_dim = obs_dim
Expand All @@ -35,8 +36,10 @@ def __init__(
self.full = False

self.device = device
self.np_float_type = np.float16 if self.device.type == "cuda" else np.float32
self.th_float_type = th.half if self.device.type == "cuda" else th.float

# future: use float16 for GPU
self.np_float_type = np.float16 if float_type == th.float16 else np.float32
self.th_float_type = float_type

self.n_rl_units = n_rl_units

Expand Down
23 changes: 14 additions & 9 deletions assume/reinforcement_learning/learning_role.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,17 @@ def __init__(
else "cpu"
)
self.device = th.device(cuda_device if th.cuda.is_available() else "cpu")
self.float_type = th.float16 if "cuda" in cuda_device else th.float

# future: add option to choose between float16 and float32
# float_type = learning_config.get("float_type", "float32")
self.float_type = th.float

th.backends.cuda.matmul.allow_tf32 = True
th.backends.cudnn.allow_tf32 = True

self.learning_rate = learning_config.get("learning_rate", 1e-4)
self.episodes_collecting_initial_experience = learning_config.get(
"episodes_collecting_initial_experience", 5
self.episodes_initial_experience = learning_config.get(
"episodes_initial_experience", 5
)
self.train_freq = learning_config.get("train_freq", 1)
self.gradient_steps = (
Expand Down Expand Up @@ -166,7 +171,7 @@ def create_learning_algorithm(self, algorithm: RLAlgorithm):
self.rl_algorithm = TD3(
learning_role=self,
learning_rate=self.learning_rate,
episodes_collecting_initial_experience=self.episodes_collecting_initial_experience,
episodes_initial_experience=self.episodes_initial_experience,
gradient_steps=self.gradient_steps,
batch_size=self.batch_size,
gamma=self.gamma,
Expand All @@ -185,7 +190,7 @@ async def update_policy(self) -> None:
Note:
This method is typically scheduled to run periodically during training to continuously improve the agent's policy.
"""
if self.episodes_done > self.episodes_collecting_initial_experience:
if self.episodes_done > self.episodes_initial_experience:
self.rl_algorithm.update_policy()

# TODO: add evaluation function
Expand Down Expand Up @@ -437,12 +442,12 @@ def create_actors(self) -> None:
The created actor networks are associated with each unit strategy and stored as attributes.
"""
for _, unit_strategy in self.rl_strats.items():
unit_strategy.actor = Actor(self.obs_dim, self.act_dim, self.float_type).to(
self.device
)
unit_strategy.actor = Actor(
obs_dim=self.obs_dim, act_dim=self.act_dim, float_type=self.float_type
).to(self.device)

unit_strategy.actor_target = Actor(
self.obs_dim, self.act_dim, self.float_type
obs_dim=self.obs_dim, act_dim=self.act_dim, float_type=self.float_type
).to(self.device)
unit_strategy.actor_target.load_state_dict(unit_strategy.actor.state_dict())
unit_strategy.actor_target.train(mode=False)
Expand Down
Loading