Skip to content

Commit

Permalink
-fixed evaluation average reward calculation
Browse files Browse the repository at this point in the history
-fixed saving learned strategies
-fixes profit and regret calculations
-fixed missing unit_id in rejected orders
-fixed issue when mcp is tensor
-fixed some dashboards
-fixed use of variable cost while inputs use fixed cost
-added check if learning is on but no RL strategies are given
-query for evarge reward in eval raises an error if not available
  • Loading branch information
nick-harder committed Sep 27, 2023
1 parent 9dcd4d5 commit 6449ac3
Show file tree
Hide file tree
Showing 17 changed files with 256 additions and 134 deletions.
6 changes: 3 additions & 3 deletions assume/common/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,8 +783,8 @@ class LearningConfig(TypedDict):
:type noise_scale: int
:param noise_dt: Determines how quickly the noise weakens over time.
:type noise_dt: int
:param load_learned_path: The path to the learned model to load.
:type load_learned_path: str
:param trained_actors_path: The path to the learned model to load.
:type trained_actors_path: str
"""

observation_dimension: int
Expand All @@ -805,4 +805,4 @@ class LearningConfig(TypedDict):
noise_sigma: float
noise_scale: int
noise_dt: int
load_learned_path: str
trained_actors_path: str
4 changes: 2 additions & 2 deletions assume/common/forecasts.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,9 @@ def calculate_marginal_cost(self, pp_series: pd.Series):

fuel_cost = fuel_price / pp_series["efficiency"]
emissions_cost = co2_price * emission_factor / pp_series["efficiency"]
variable_cost = pp_series["var_cost"] if "var_cost" in pp_series else 0.0
fixed_cost = pp_series["fixed_cost"] if "fixed_cost" in pp_series else 0.0

marginal_cost = fuel_cost + emissions_cost + variable_cost
marginal_cost = fuel_cost + emissions_cost + fixed_cost

return marginal_cost

Expand Down
16 changes: 10 additions & 6 deletions assume/common/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,16 @@ def write_market_orders(self, market_orders, market_id):
# check if market results list is empty and skip the funktion and raise a warning
if not market_orders:
return

market_orders = separate_orders(market_orders)
df = pd.DataFrame.from_records(market_orders, index="start_time")

del df["only_hours"]
del df["agent_id"]

df["simulation"] = self.simulation_id
df["market_id"] = market_id

self.write_dfs["market_orders"].append(df)

def write_units_definition(self, unit_info: dict):
Expand Down Expand Up @@ -380,6 +384,8 @@ async def on_stop(self):

dfs.append(df)

# remove all empty dataframes
dfs = [df for df in dfs if not df.empty]
if not dfs:
return

Expand All @@ -402,13 +408,11 @@ async def on_stop(self):

def get_sum_reward(self):
query = text(
"select value from kpis where variable = 'sum_reward' and ident = '{self.simulation_id}'"
f"select reward FROM rl_params where simulation='{self.simulation_id}'"
)

try:
with self.db.begin() as db:
avg_reward = db.execute(query).fetchall()[0]
except Exception:
avg_reward = 0
with self.db.begin() as db:
reward = db.execute(query).fetchall()
avg_reward = sum(r[0] for r in reward) / len(reward)

return avg_reward
50 changes: 32 additions & 18 deletions assume/common/scenario_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ async def load_scenario_folder_async(
perform_evaluation: bool = False,
episode: int = 0,
eval_episode: int = 0,
load_learned_path: str = "",
trained_actors_path: str = "",
):
"""Load a scenario from a given path. Raises: ValueError: If the scenario or study case is not found.
Expand Down Expand Up @@ -304,12 +304,12 @@ async def load_scenario_folder_async(
)
learning_config["evaluation_mode"] = perform_evaluation

if "load_learned_path" not in learning_config.keys():
if load_learned_path:
learning_config["load_learned_path"] = load_learned_path
if "trained_actors_path" not in learning_config.keys():
if trained_actors_path:
learning_config["trained_actors_path"] = trained_actors_path
else:
learning_config[
"load_learned_path"
"trained_actors_path"
] = f"{inputs_path}/learned_strategies/{sim_id}"

if learning_config.get("learning_mode", False):
Expand Down Expand Up @@ -451,6 +451,13 @@ async def load_scenario_folder_async(
forecaster=forecaster,
)

if (
world.learning_mode
and world.learning_role is not None
and len(world.learning_role.rl_strats) == 0
):
raise ValueError("No RL units/strategies were provided!")

Check warning on line 459 in assume/common/scenario_loader.py

View check run for this annotation

Codecov / codecov/patch

assume/common/scenario_loader.py#L459

Added line #L459 was not covered by tests


async def async_load_custom_units(
world: World,
Expand Down Expand Up @@ -523,9 +530,9 @@ def load_scenario_folder(
study_case: str,
perform_learning: bool = True,
perform_evaluation: bool = False,
episode: int = 0,
eval_episode: int = 0,
load_learned_path="",
episode: int = 1,
eval_episode: int = 1,
trained_actors_path="",
):
"""
Load a scenario from a given path.
Expand All @@ -549,7 +556,7 @@ def load_scenario_folder(
perform_evaluation=perform_evaluation,
episode=episode,
eval_episode=eval_episode,
load_learned_path=load_learned_path,
trained_actors_path=trained_actors_path,
)
)

Expand All @@ -574,7 +581,13 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
actors_and_critics = None
world.output_role.del_similar_runs()

validation_interval = min(
world.learning_role.training_episodes,
world.learning_config.get("validation_episodes_interval", 5),
)

eval_episode = 1

for episode in tqdm(
range(1, world.learning_role.training_episodes + 1),
desc="Training Episodes",
Expand All @@ -601,17 +614,16 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
world.learning_role.turn_off_initial_exploration()

world.run()

actors_and_critics = world.learning_role.extract_actors_and_critics()
validation_interval = min(
world.learning_role.training_episodes,
world.learning_config.get("validation_episodes_interval", 5),
)

if (
episode % validation_interval == 0
and episode > world.learning_role.episodes_collecting_initial_experience
):
old_path = world.learning_config["load_learned_path"]
old_path = world.learning_config["trained_actors_path"]
new_path = f"{old_path}_eval"

# save validation params in validation path
world.learning_role.save_params(directory=new_path)
world.reset()
Expand All @@ -625,14 +637,16 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
perform_learning=False,
perform_evaluation=True,
eval_episode=eval_episode,
load_learned_path=new_path,
trained_actors_path=new_path,
)

world.run()

avg_reward = world.output_role.get_sum_reward()
# check reward improvement in validation run
world.learning_config["load_learned_path"] = old_path
if best_reward < avg_reward:
# save new best params for simulation
world.learning_config["trained_actors_path"] = old_path
if avg_reward > best_reward:
# update best reward
best_reward = avg_reward
world.learning_role.save_params(directory=old_path)

Expand Down
6 changes: 5 additions & 1 deletion assume/common/units_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,15 @@ def handle_market_feedback(self, content: ClearingMessage, meta: dict[str, str])
:type meta: dict[str, str]
"""
logger.debug(f"{self.id} got market result: {content}")
orderbook: Orderbook = content["orderbook"]
accepted_orders: Orderbook = content["accepted_orders"]
rejected_orders: Orderbook = content["rejected_orders"]
orderbook = accepted_orders + rejected_orders

for order in orderbook:
order["market_id"] = content["market_id"]
# map bid id to unit id
order["unit_id"] = self.bids_map[order["bid_id"]]

self.valid_orders.extend(orderbook)
marketconfig = self.registered_markets[content["market_id"]]
self.set_unit_dispatch(orderbook, marketconfig)
Expand Down
14 changes: 7 additions & 7 deletions assume/markets/base_market.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,34 +344,34 @@ async def clear_market(self, market_products: list[MarketProduct]):
(
accepted_orderbook,
rejected_orderbook,
# pending_orderbook,
market_meta,
) = self.clear(self.all_orders, market_products)
self.all_orders = []
for order in rejected_orderbook:
order["accepted_volume"] = 0
order["accepted_price"] = 0
order["accepted_price"] = market_meta[0]["price"]
self.open_auctions - set(market_products)
# self.all_orders = pending_orderbook

accepted_orderbook.sort(key=itemgetter("agent_id"))
rejected_orderbook.sort(key=itemgetter("agent_id"))
accepted_bids = {

accepted_orders = {
agent: list(bids)
for agent, bids in groupby(accepted_orderbook, itemgetter("agent_id"))
}
rejected_bids = {
rejected_orders = {
agent: list(bids)
for agent, bids in groupby(rejected_orderbook, itemgetter("agent_id"))
}

for agent in self.registered_agents:
addr, aid = agent
meta = {"sender_addr": self.context.addr, "sender_id": self.context.aid}
closing: ClearingMessage = {
"context": "clearing",
"market_id": self.marketconfig.name,
"orderbook": accepted_bids.get(agent, []),
"rejected": rejected_bids.get(agent, []),
"accepted_orders": accepted_orders.get(agent, []),
"rejected_orders": rejected_orders.get(agent, []),
}
await self.context.send_acl_message(
closing,
Expand Down
11 changes: 5 additions & 6 deletions assume/markets/clearing_algorithms/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,20 +112,17 @@ def clear(
split_demand_order["accepted_volume"] = diff
demand_order["accepted_volume"] = demand_order["volume"] - diff
rejected_orders.append(split_demand_order)

elif diff > 0:
# generation left over - split last generation bid
supply_order = to_commit[-1]
split_supply_order = supply_order.copy()
split_supply_order["volume"] = diff
supply_order["accepted_volume"] = supply_order["volume"] - diff

# changed supply_order is still part of to_commit and will be added
# only volume-diff can be sold for current price
gen_vol -= diff

# add left over to supply_orders again
supply_orders.insert(0, split_supply_order)
else:
# diff == 0 perfect match
demand_order["accepted_volume"] = demand_order["volume"]

accepted_product_orders.append(demand_order)
Expand All @@ -139,7 +136,9 @@ def clear(
x for x in accepted_product_orders if x["accepted_volume"] > 0
]
if accepted_supply_orders:
clear_price = max(map(itemgetter("price"), accepted_supply_orders))
clear_price = float(
max(map(itemgetter("price"), accepted_supply_orders))
)
else:
clear_price = 0

Expand Down
16 changes: 8 additions & 8 deletions assume/strategies/learning_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def __init__(self, *args, **kwargs):
dt=kwargs.get("noise_dt", 1.0),
)

elif Path(load_path=kwargs["load_learned_path"]).is_dir():
self.load_actor_params(load_path=kwargs["load_learned_path"])
elif Path(load_path=kwargs["trained_actors_path"]).is_dir():
self.load_actor_params(load_path=kwargs["trained_actors_path"])

def calculate_bids(
self,
Expand Down Expand Up @@ -364,14 +364,14 @@ def calculate_reward(
timestep=start,
)

# calculate profit as income - running_cost from this event
duration = (end - start) / timedelta(hours=1)
order_profit = order["price"] * order["volume"] * duration

# calculate profit as income - running_cost from this event
price_difference = order["accepted_price"] - marginal_cost
order_profit = price_difference * order["accepted_volume"] * duration

# calculate opportunity cost
# as the loss of income we have because we are not running at full power
price_difference = order["price"] - marginal_cost

order_opportunity_cost = (
price_difference
* (
Expand Down Expand Up @@ -411,9 +411,9 @@ def calculate_reward(
reward = float(profit - regret_scale * opportunity_cost) * scaling

# store results in unit outputs which are written to database by unit operator
unit.outputs["profit"].loc[start:end_excl] += float(profit)
unit.outputs["profit"].loc[start:end_excl] += profit
unit.outputs["reward"].loc[start:end_excl] = reward
unit.outputs["regret"].loc[start:end_excl] = float(opportunity_cost)
unit.outputs["regret"].loc[start:end_excl] = opportunity_cost

def load_actor_params(self, load_path):
"""
Expand Down
12 changes: 2 additions & 10 deletions assume/units/powerplant.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def __init__(
min_power: float = 0.0,
efficiency: float = 1.0,
fixed_cost: float = 0.0,
variable_cost: float | pd.Series = 0.0,
partial_load_eff: bool = False,
fuel_type: str = "others",
emission_factor: float = 0.0,
Expand Down Expand Up @@ -126,7 +125,6 @@ def __init__(
)

self.fixed_cost = fixed_cost
self.variable_cost = variable_cost
self.hot_start_cost = hot_start_cost * max_power
self.warm_start_cost = warm_start_cost * max_power
self.cold_start_cost = cold_start_cost * max_power
Expand Down Expand Up @@ -197,7 +195,7 @@ def calc_simple_marginal_cost(
marginal_cost = (
fuel_price / self.efficiency
+ self.forecaster.get_price("co2") * self.emission_factor / self.efficiency
+ self.variable_cost
+ self.fixed_cost
)

return marginal_cost
Expand Down Expand Up @@ -256,16 +254,10 @@ def calc_marginal_cost_with_partial_eff(
efficiency = self.efficiency - eta_loss
co2_price = self.forecaster.get_price("co2").at[timestep]

variable_cost = (
self.variable_cost
if isinstance(self.variable_cost, float)
else self.variable_cost[timestep]
)

marginal_cost = (
fuel_price / efficiency
+ co2_price * self.emission_factor / efficiency
+ variable_cost
+ self.fixed_cost
)

return marginal_cost
Expand Down
Loading

0 comments on commit 6449ac3

Please sign in to comment.