Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix learning on cuda #201

Merged
merged 10 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@ dmypy.json
.idea
.vscode
examples/inputs/learned_strategies
workshop/

examples/outputs
examples/local_db/
validation_runs
assume-db
forecasts_df.csv
forecasts_df.csv
2 changes: 1 addition & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ build:
python: mambaforge-4.10

conda:
environment: environment_docs.yml
environment: environment_docs.yaml
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ASSUME: Agent-Based Electricity Markets Simulation Toolbox

![Lint Status](https://github.com/assume-framework/assume/actions/workflows/lint-pytest.yml/badge.svg)
![Lint Status](https://github.com/assume-framework/assume/actions/workflows/lint-pytest.yaml/badge.svg)
[![Code Coverage](https://codecov.io/gh/assume-framework/assume/branch/main/graph/badge.svg?token=CZ4FO7P57H)](https://codecov.io/gh/assume-framework/assume)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8088760.svg)](https://doi.org/10.5281/zenodo.8088760)

Expand Down Expand Up @@ -114,7 +114,7 @@ pre-commit run --all-files
First, create an environment that includes the documentation dependencies:

```bash
conda env create -f environment_docs.yml
conda env create -f environment_docs.yaml
```

To generate or update the automatically created docs in `docs/source/assume*`, run:
Expand Down
6 changes: 4 additions & 2 deletions assume/common/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(
end: datetime,
db_engine=None,
export_csv_path: str = "",
save_frequency_hours: int = 24,
save_frequency_hours: int = None,
learning_mode: bool = False,
):
super().__init__()
Expand Down Expand Up @@ -202,6 +202,7 @@ async def store_dfs(self):
for table in self.write_dfs.keys():
if len(self.write_dfs[table]) == 0:
continue

df = pd.concat(self.write_dfs[table], axis=0)
df.reset_index()
if df.empty:
Expand Down Expand Up @@ -318,7 +319,8 @@ def write_market_dispatch(self, data):
"""
df = pd.DataFrame(data, columns=["datetime", "power", "market_id", "unit_id"])
df["simulation"] = self.simulation_id
self.write_dfs["market_dispatch"].append(df)
if not df.empty:
self.write_dfs["market_dispatch"].append(df)

def write_unit_dispatch(self, data):
"""
Expand Down
9 changes: 6 additions & 3 deletions assume/common/scenario_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ async def load_scenario_folder_async(

# load the config file
path = f"{inputs_path}/{scenario}"
with open(f"{path}/config.yml", "r") as f:
with open(f"{path}/config.yaml", "r") as f:
config = yaml.safe_load(f)
if not study_case:
study_case = list(config.keys())[0]
Expand Down Expand Up @@ -559,6 +559,7 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
act_dim=world.learning_role.act_dim,
n_rl_units=len(world.learning_role.rl_strats),
device=world.learning_role.device,
float_type=world.learning_role.float_type,
)
actors_and_critics = None
world.output_role.del_similar_runs()
Expand All @@ -584,7 +585,7 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
world.learning_role.buffer = buffer
world.learning_role.episodes_done = episode

if episode + 1 >= world.learning_role.episodes_collecting_initial_experience:
if episode + 1 > world.learning_role.episodes_collecting_initial_experience:
world.learning_role.turn_off_initial_exploration()

world.run()
Expand All @@ -593,7 +594,9 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
world.learning_role.training_episodes,
world.learning_config.get("validation_episodes_interval", 5),
)
if (episode + 1) % validation_interval == 0:
if (episode + 1) % validation_interval == 0 and (
episode + 1
) > world.learning_role.episodes_collecting_initial_experience:
old_path = world.learning_config["load_learned_path"]
new_path = f"{old_path}_eval"
# save validation params in validation path
Expand Down
38 changes: 24 additions & 14 deletions assume/common/units_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,9 @@ def write_to_learning(
self,
start: datetime,
marketconfig: MarketConfig,
action_dimension: int,
obs_dim: int,
act_dim: int,
device: str,
learning_unit_count: int,
):
learning_role_id = "learning_agent"
Expand All @@ -422,27 +424,28 @@ def write_to_learning(
try:
import torch as th

all_actions = th.zeros(
(learning_unit_count, action_dimension), device="cpu"
)
except ImportError:
logger.error("tried writing learning_params, but torch is not installed")
all_actions = np.zeros((learning_unit_count, action_dimension))
all_actions = np.zeros((learning_unit_count, act_dim))
return

all_observations = th.zeros((learning_unit_count, obs_dim), device=device)
all_actions = th.zeros((learning_unit_count, act_dim), device=device)

i = 0
for unit_id, unit in self.units.items():
for unit in self.units.values():
# rl only for energy market for now!
if isinstance(
unit.bidding_strategies.get(marketconfig.product_type),
LearningStrategy,
):
all_observations.append(
np.array(unit.outputs["rl_observations"][start])
)
all_observations[i, :] = unit.outputs["rl_observations"][start]
all_actions[i, :] = unit.outputs["rl_actions"][start]
all_rewards.append(unit.outputs["reward"][start])
i += 1

# convert all_actions list of tensor to numpy 2D array
all_observations = all_observations.squeeze().cpu().numpy()
all_actions = all_actions.squeeze().cpu().numpy()
all_rewards = np.array(all_rewards)
rl_agent_data = (np.array(all_observations), all_actions, all_rewards)
Expand All @@ -467,16 +470,18 @@ def write_learning_params(self, orderbook: Orderbook, marketconfig: MarketConfig
:type marketconfig: MarketConfig
"""
learning_strategies = []
action_dimension = 0

for unit in self.units.values():
bidding_strategy = unit.bidding_strategies.get(marketconfig.product_type)
if isinstance(bidding_strategy, LearningStrategy):
learning_strategies.append(bidding_strategy)
# should be the same across all strategies
action_dimension = bidding_strategy.act_dim
obs_dim = bidding_strategy.obs_dim
act_dim = bidding_strategy.act_dim
device = bidding_strategy.device

# should write learning results if at least one bidding_strategy is a learning strategy
write_learning_results = len(learning_strategies) > 0 and orderbook
if write_learning_results:
if learning_strategies and orderbook:
start = orderbook[0]["start_time"]
# write learning output
self.write_learning_to_output(start, marketconfig)
Expand All @@ -486,5 +491,10 @@ def write_learning_params(self, orderbook: Orderbook, marketconfig: MarketConfig
if learning_strategies[0].learning_mode:
# in learning mode we are sending data to learning
self.write_to_learning(
start, marketconfig, action_dimension, len(learning_strategies)
start=start,
marketconfig=marketconfig,
obs_dim=obs_dim,
act_dim=act_dim,
device=device,
learning_unit_count=len(learning_strategies),
)
2 changes: 1 addition & 1 deletion assume/reinforcement_learning/algorithms/base_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,6 @@ def __init__(
self.unique_obs_len = 8

def update_policy(self):
self.logger.error(
logger.error(
"No policy update function of the used Rl algorithm was defined. Please define how the policies should be updated in the specific algorithm you use"
)
7 changes: 5 additions & 2 deletions assume/reinforcement_learning/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(
act_dim: int,
n_rl_units: int,
device: str,
float_type,
):
self.buffer_size = buffer_size
self.obs_dim = obs_dim
Expand All @@ -35,8 +36,10 @@ def __init__(
self.full = False

self.device = device
self.np_float_type = np.float16 if self.device.type == "cuda" else np.float32
self.th_float_type = th.half if self.device.type == "cuda" else th.float

# future: use float16 for GPU
self.np_float_type = np.float16 if float_type == th.float16 else np.float32
self.th_float_type = float_type

self.n_rl_units = n_rl_units

Expand Down
15 changes: 10 additions & 5 deletions assume/reinforcement_learning/learning_role.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,13 @@ def __init__(
else "cpu"
)
self.device = th.device(cuda_device if th.cuda.is_available() else "cpu")
self.float_type = th.float16 if "cuda" in cuda_device else th.float

# future: add option to choose between float16 and float32
# float_type = learning_config.get("float_type", "float32")
self.float_type = th.float

th.backends.cuda.matmul.allow_tf32 = True
th.backends.cudnn.allow_tf32 = True

self.learning_rate = learning_config.get("learning_rate", 1e-4)
self.episodes_collecting_initial_experience = learning_config.get(
Expand Down Expand Up @@ -437,12 +442,12 @@ def create_actors(self) -> None:
The created actor networks are associated with each unit strategy and stored as attributes.
"""
for _, unit_strategy in self.rl_strats.items():
unit_strategy.actor = Actor(self.obs_dim, self.act_dim, self.float_type).to(
self.device
)
unit_strategy.actor = Actor(
obs_dim=self.obs_dim, act_dim=self.act_dim, float_type=self.float_type
).to(self.device)

unit_strategy.actor_target = Actor(
self.obs_dim, self.act_dim, self.float_type
obs_dim=self.obs_dim, act_dim=self.act_dim, float_type=self.float_type
).to(self.device)
unit_strategy.actor_target.load_state_dict(unit_strategy.actor.state_dict())
unit_strategy.actor_target.train(mode=False)
Expand Down
Loading
Loading