Skip to content

Commit

Permalink
Merge pull request #212 from assume-framework/fix-learning-strategies
Browse files Browse the repository at this point in the history
Adressing some major issues with learning
  • Loading branch information
maurerle authored Sep 28, 2023
2 parents 9dcd4d5 + 5f1b792 commit 1bd92c8
Show file tree
Hide file tree
Showing 19 changed files with 318 additions and 187 deletions.
6 changes: 3 additions & 3 deletions assume/common/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,8 +783,8 @@ class LearningConfig(TypedDict):
:type noise_scale: int
:param noise_dt: Determines how quickly the noise weakens over time.
:type noise_dt: int
:param load_learned_path: The path to the learned model to load.
:type load_learned_path: str
:param trained_actors_path: The path to the learned model to load.
:type trained_actors_path: str
"""

observation_dimension: int
Expand All @@ -805,4 +805,4 @@ class LearningConfig(TypedDict):
noise_sigma: float
noise_scale: int
noise_dt: int
load_learned_path: str
trained_actors_path: str
4 changes: 2 additions & 2 deletions assume/common/forecasts.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,9 @@ def calculate_marginal_cost(self, pp_series: pd.Series):

fuel_cost = fuel_price / pp_series["efficiency"]
emissions_cost = co2_price * emission_factor / pp_series["efficiency"]
variable_cost = pp_series["var_cost"] if "var_cost" in pp_series else 0.0
fixed_cost = pp_series["fixed_cost"] if "fixed_cost" in pp_series else 0.0

marginal_cost = fuel_cost + emissions_cost + variable_cost
marginal_cost = fuel_cost + emissions_cost + fixed_cost

return marginal_cost

Expand Down
16 changes: 10 additions & 6 deletions assume/common/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,16 @@ def write_market_orders(self, market_orders, market_id):
# check if market results list is empty and skip the funktion and raise a warning
if not market_orders:
return

market_orders = separate_orders(market_orders)
df = pd.DataFrame.from_records(market_orders, index="start_time")

del df["only_hours"]
del df["agent_id"]

df["simulation"] = self.simulation_id
df["market_id"] = market_id

self.write_dfs["market_orders"].append(df)

def write_units_definition(self, unit_info: dict):
Expand Down Expand Up @@ -380,6 +384,8 @@ async def on_stop(self):

dfs.append(df)

# remove all empty dataframes
dfs = [df for df in dfs if not df.empty]
if not dfs:
return

Expand All @@ -402,13 +408,11 @@ async def on_stop(self):

def get_sum_reward(self):
query = text(
"select value from kpis where variable = 'sum_reward' and ident = '{self.simulation_id}'"
f"select reward FROM rl_params where simulation='{self.simulation_id}'"
)

try:
with self.db.begin() as db:
avg_reward = db.execute(query).fetchall()[0]
except Exception:
avg_reward = 0
with self.db.begin() as db:
reward = db.execute(query).fetchall()
avg_reward = sum(r[0] for r in reward) / len(reward)

return avg_reward
50 changes: 32 additions & 18 deletions assume/common/scenario_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ async def load_scenario_folder_async(
perform_evaluation: bool = False,
episode: int = 0,
eval_episode: int = 0,
load_learned_path: str = "",
trained_actors_path: str = "",
):
"""Load a scenario from a given path. Raises: ValueError: If the scenario or study case is not found.
Expand Down Expand Up @@ -304,12 +304,12 @@ async def load_scenario_folder_async(
)
learning_config["evaluation_mode"] = perform_evaluation

if "load_learned_path" not in learning_config.keys():
if load_learned_path:
learning_config["load_learned_path"] = load_learned_path
if "trained_actors_path" not in learning_config.keys():
if trained_actors_path:
learning_config["trained_actors_path"] = trained_actors_path
else:
learning_config[
"load_learned_path"
"trained_actors_path"
] = f"{inputs_path}/learned_strategies/{sim_id}"

if learning_config.get("learning_mode", False):
Expand Down Expand Up @@ -451,6 +451,13 @@ async def load_scenario_folder_async(
forecaster=forecaster,
)

if (
world.learning_mode
and world.learning_role is not None
and len(world.learning_role.rl_strats) == 0
):
raise ValueError("No RL units/strategies were provided!")


async def async_load_custom_units(
world: World,
Expand Down Expand Up @@ -523,9 +530,9 @@ def load_scenario_folder(
study_case: str,
perform_learning: bool = True,
perform_evaluation: bool = False,
episode: int = 0,
eval_episode: int = 0,
load_learned_path="",
episode: int = 1,
eval_episode: int = 1,
trained_actors_path="",
):
"""
Load a scenario from a given path.
Expand All @@ -549,7 +556,7 @@ def load_scenario_folder(
perform_evaluation=perform_evaluation,
episode=episode,
eval_episode=eval_episode,
load_learned_path=load_learned_path,
trained_actors_path=trained_actors_path,
)
)

Expand All @@ -574,7 +581,13 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
actors_and_critics = None
world.output_role.del_similar_runs()

validation_interval = min(
world.learning_role.training_episodes,
world.learning_config.get("validation_episodes_interval", 5),
)

eval_episode = 1

for episode in tqdm(
range(1, world.learning_role.training_episodes + 1),
desc="Training Episodes",
Expand All @@ -601,17 +614,16 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
world.learning_role.turn_off_initial_exploration()

world.run()

actors_and_critics = world.learning_role.extract_actors_and_critics()
validation_interval = min(
world.learning_role.training_episodes,
world.learning_config.get("validation_episodes_interval", 5),
)

if (
episode % validation_interval == 0
and episode > world.learning_role.episodes_collecting_initial_experience
):
old_path = world.learning_config["load_learned_path"]
old_path = world.learning_config["trained_actors_path"]
new_path = f"{old_path}_eval"

# save validation params in validation path
world.learning_role.save_params(directory=new_path)
world.reset()
Expand All @@ -625,14 +637,16 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
perform_learning=False,
perform_evaluation=True,
eval_episode=eval_episode,
load_learned_path=new_path,
trained_actors_path=new_path,
)

world.run()

avg_reward = world.output_role.get_sum_reward()
# check reward improvement in validation run
world.learning_config["load_learned_path"] = old_path
if best_reward < avg_reward:
# save new best params for simulation
world.learning_config["trained_actors_path"] = old_path
if avg_reward > best_reward:
# update best reward
best_reward = avg_reward
world.learning_role.save_params(directory=old_path)

Expand Down
6 changes: 5 additions & 1 deletion assume/common/units_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,15 @@ def handle_market_feedback(self, content: ClearingMessage, meta: dict[str, str])
:type meta: dict[str, str]
"""
logger.debug(f"{self.id} got market result: {content}")
orderbook: Orderbook = content["orderbook"]
accepted_orders: Orderbook = content["accepted_orders"]
rejected_orders: Orderbook = content["rejected_orders"]
orderbook = accepted_orders + rejected_orders

for order in orderbook:
order["market_id"] = content["market_id"]
# map bid id to unit id
order["unit_id"] = self.bids_map[order["bid_id"]]

self.valid_orders.extend(orderbook)
marketconfig = self.registered_markets[content["market_id"]]
self.set_unit_dispatch(orderbook, marketconfig)
Expand Down
14 changes: 7 additions & 7 deletions assume/markets/base_market.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,34 +344,34 @@ async def clear_market(self, market_products: list[MarketProduct]):
(
accepted_orderbook,
rejected_orderbook,
# pending_orderbook,
market_meta,
) = self.clear(self.all_orders, market_products)
self.all_orders = []
for order in rejected_orderbook:
order["accepted_volume"] = 0
order["accepted_price"] = 0
order["accepted_price"] = market_meta[0]["price"]
self.open_auctions - set(market_products)
# self.all_orders = pending_orderbook

accepted_orderbook.sort(key=itemgetter("agent_id"))
rejected_orderbook.sort(key=itemgetter("agent_id"))
accepted_bids = {

accepted_orders = {
agent: list(bids)
for agent, bids in groupby(accepted_orderbook, itemgetter("agent_id"))
}
rejected_bids = {
rejected_orders = {
agent: list(bids)
for agent, bids in groupby(rejected_orderbook, itemgetter("agent_id"))
}

for agent in self.registered_agents:
addr, aid = agent
meta = {"sender_addr": self.context.addr, "sender_id": self.context.aid}
closing: ClearingMessage = {
"context": "clearing",
"market_id": self.marketconfig.name,
"orderbook": accepted_bids.get(agent, []),
"rejected": rejected_bids.get(agent, []),
"accepted_orders": accepted_orders.get(agent, []),
"rejected_orders": rejected_orders.get(agent, []),
}
await self.context.send_acl_message(
closing,
Expand Down
59 changes: 26 additions & 33 deletions assume/markets/clearing_algorithms/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ def clear(
meta = []
orderbook.sort(key=market_getter)
for product, product_orders in groupby(orderbook, market_getter):
accepted_product_orders: Orderbook = []
accepted_demand_orders: Orderbook = []
accepted_supply_orders: Orderbook = []
product_orders = list(product_orders)
if product not in market_products:
rejected_orders.extend(product_orders)
Expand Down Expand Up @@ -89,15 +90,18 @@ def clear(
# now add the next demand order
dem_vol += -demand_order["volume"]
demand_order["accepted_volume"] = demand_order["volume"]
to_commit: Orderbook = []

# and add supply until the demand order is matched
while supply_orders and gen_vol < dem_vol:
supply_order = supply_orders.pop(0)
if supply_order["price"] <= demand_order["price"]:
added = supply_order["volume"] - supply_order.get(
"accepted_volume", 0
)
should_insert = not supply_order.get("accepted_volume")
supply_order["accepted_volume"] = supply_order["volume"]
to_commit.append(supply_order)
gen_vol += supply_order["volume"]
if should_insert:
accepted_supply_orders.append(supply_order)
gen_vol += added
else:
rejected_orders.append(supply_order)
# now we know which orders we need
Expand All @@ -107,47 +111,39 @@ def clear(

if diff < 0:
# gen < dem
# generation is not enough - split last demand bid
split_demand_order = demand_order.copy()
split_demand_order["accepted_volume"] = diff
# generation is not enough - accept partially
demand_order["accepted_volume"] = demand_order["volume"] - diff
rejected_orders.append(split_demand_order)
elif diff > 0:
# generation left over - split last generation bid
supply_order = to_commit[-1]
split_supply_order = supply_order.copy()
split_supply_order["volume"] = diff
# generation left over - accept generation bid partially
supply_order = accepted_supply_orders[-1]
supply_order["accepted_volume"] = supply_order["volume"] - diff

# changed supply_order is still part of to_commit and will be added
# only volume-diff can be sold for current price
gen_vol -= diff

# add left over to supply_orders again
supply_orders.insert(0, split_supply_order)
supply_orders.insert(0, supply_order)
demand_order["accepted_volume"] = demand_order["volume"]
else:
# diff == 0 perfect match
demand_order["accepted_volume"] = demand_order["volume"]

accepted_product_orders.append(demand_order)
accepted_product_orders.extend(to_commit)
accepted_demand_orders.append(demand_order)

for order in supply_orders:
rejected_orders.append(order)

# set clearing price - merit order - uniform pricing
accepted_supply_orders = [
x for x in accepted_product_orders if x["accepted_volume"] > 0
]
if accepted_supply_orders:
clear_price = max(map(itemgetter("price"), accepted_supply_orders))
clear_price = float(
max(map(itemgetter("price"), accepted_supply_orders))
)
else:
clear_price = 0

accepted_product_orders = accepted_demand_orders + accepted_supply_orders
for order in accepted_product_orders:
order["accepted_price"] = clear_price
accepted_demand_orders = [
x for x in accepted_product_orders if x["accepted_volume"] < 0
]
accepted_orders.extend(accepted_product_orders)

meta.append(
Expand Down Expand Up @@ -182,7 +178,8 @@ def clear(
meta = []
orderbook.sort(key=market_getter)
for product, product_orders in groupby(orderbook, market_getter):
accepted_product_orders: Orderbook = []
accepted_demand_orders: Orderbook = []
accepted_supply_orders: Orderbook = []
if product not in market_products:
rejected_orders.extend(product_orders)
# log.debug(f'found unwanted bids for {product} should be {market_products}')
Expand Down Expand Up @@ -242,27 +239,23 @@ def clear(
gen_vol -= diff

supply_orders.insert(0, split_supply_order)
demand_order["accepted_volume"] = demand_order["volume"]
else:
# diff == 0 perfect match
demand_order["accepted_volume"] = demand_order["volume"]

accepted_orders.append(demand_order)
accepted_demand_orders.append(demand_order)
# pay as bid
for supply_order in to_commit:
supply_order["accepted_price"] = supply_order["price"]

demand_order["accepted_price"] = supply_order["price"]
accepted_product_orders.extend(to_commit)
accepted_supply_orders.extend(to_commit)

for order in supply_orders:
rejected_orders.append(order)

accepted_supply_orders = [
x for x in accepted_product_orders if x["accepted_volume"] > 0
]
accepted_demand_orders = [
x for x in accepted_product_orders if x["accepted_volume"] < 0
]
accepted_product_orders = accepted_demand_orders + accepted_supply_orders

accepted_orders.extend(accepted_product_orders)
meta.append(
Expand Down
Loading

0 comments on commit 1bd92c8

Please sign in to comment.