Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adressing some major issues with learning #212

Merged
merged 2 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions assume/common/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,8 +783,8 @@ class LearningConfig(TypedDict):
:type noise_scale: int
:param noise_dt: Determines how quickly the noise weakens over time.
:type noise_dt: int
:param load_learned_path: The path to the learned model to load.
:type load_learned_path: str
:param trained_actors_path: The path to the learned model to load.
:type trained_actors_path: str
"""

observation_dimension: int
Expand All @@ -805,4 +805,4 @@ class LearningConfig(TypedDict):
noise_sigma: float
noise_scale: int
noise_dt: int
load_learned_path: str
trained_actors_path: str
4 changes: 2 additions & 2 deletions assume/common/forecasts.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,9 @@ def calculate_marginal_cost(self, pp_series: pd.Series):

fuel_cost = fuel_price / pp_series["efficiency"]
emissions_cost = co2_price * emission_factor / pp_series["efficiency"]
variable_cost = pp_series["var_cost"] if "var_cost" in pp_series else 0.0
fixed_cost = pp_series["fixed_cost"] if "fixed_cost" in pp_series else 0.0

marginal_cost = fuel_cost + emissions_cost + variable_cost
marginal_cost = fuel_cost + emissions_cost + fixed_cost

return marginal_cost

Expand Down
16 changes: 10 additions & 6 deletions assume/common/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,16 @@ def write_market_orders(self, market_orders, market_id):
# check if market results list is empty and skip the funktion and raise a warning
if not market_orders:
return

market_orders = separate_orders(market_orders)
df = pd.DataFrame.from_records(market_orders, index="start_time")

del df["only_hours"]
del df["agent_id"]

df["simulation"] = self.simulation_id
df["market_id"] = market_id

self.write_dfs["market_orders"].append(df)

def write_units_definition(self, unit_info: dict):
Expand Down Expand Up @@ -380,6 +384,8 @@ async def on_stop(self):

dfs.append(df)

# remove all empty dataframes
dfs = [df for df in dfs if not df.empty]
nick-harder marked this conversation as resolved.
Show resolved Hide resolved
if not dfs:
return

Expand All @@ -402,13 +408,11 @@ async def on_stop(self):

def get_sum_reward(self):
query = text(
"select value from kpis where variable = 'sum_reward' and ident = '{self.simulation_id}'"
f"select reward FROM rl_params where simulation='{self.simulation_id}'"
)

try:
with self.db.begin() as db:
avg_reward = db.execute(query).fetchall()[0]
except Exception:
avg_reward = 0
with self.db.begin() as db:
reward = db.execute(query).fetchall()
avg_reward = sum(r[0] for r in reward) / len(reward)

return avg_reward
50 changes: 32 additions & 18 deletions assume/common/scenario_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ async def load_scenario_folder_async(
perform_evaluation: bool = False,
episode: int = 0,
eval_episode: int = 0,
load_learned_path: str = "",
trained_actors_path: str = "",
):
"""Load a scenario from a given path. Raises: ValueError: If the scenario or study case is not found.

Expand Down Expand Up @@ -304,12 +304,12 @@ async def load_scenario_folder_async(
)
learning_config["evaluation_mode"] = perform_evaluation

if "load_learned_path" not in learning_config.keys():
if load_learned_path:
learning_config["load_learned_path"] = load_learned_path
if "trained_actors_path" not in learning_config.keys():
if trained_actors_path:
learning_config["trained_actors_path"] = trained_actors_path
else:
learning_config[
"load_learned_path"
"trained_actors_path"
] = f"{inputs_path}/learned_strategies/{sim_id}"

if learning_config.get("learning_mode", False):
Expand Down Expand Up @@ -451,6 +451,13 @@ async def load_scenario_folder_async(
forecaster=forecaster,
)

if (
world.learning_mode
and world.learning_role is not None
and len(world.learning_role.rl_strats) == 0
):
raise ValueError("No RL units/strategies were provided!")


async def async_load_custom_units(
world: World,
Expand Down Expand Up @@ -523,9 +530,9 @@ def load_scenario_folder(
study_case: str,
perform_learning: bool = True,
perform_evaluation: bool = False,
episode: int = 0,
eval_episode: int = 0,
load_learned_path="",
episode: int = 1,
eval_episode: int = 1,
trained_actors_path="",
):
"""
Load a scenario from a given path.
Expand All @@ -549,7 +556,7 @@ def load_scenario_folder(
perform_evaluation=perform_evaluation,
episode=episode,
eval_episode=eval_episode,
load_learned_path=load_learned_path,
trained_actors_path=trained_actors_path,
)
)

Expand All @@ -574,7 +581,13 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
actors_and_critics = None
world.output_role.del_similar_runs()

validation_interval = min(
world.learning_role.training_episodes,
world.learning_config.get("validation_episodes_interval", 5),
)

eval_episode = 1

for episode in tqdm(
range(1, world.learning_role.training_episodes + 1),
desc="Training Episodes",
Expand All @@ -601,17 +614,16 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
world.learning_role.turn_off_initial_exploration()

world.run()

actors_and_critics = world.learning_role.extract_actors_and_critics()
validation_interval = min(
world.learning_role.training_episodes,
world.learning_config.get("validation_episodes_interval", 5),
)

if (
episode % validation_interval == 0
and episode > world.learning_role.episodes_collecting_initial_experience
):
old_path = world.learning_config["load_learned_path"]
old_path = world.learning_config["trained_actors_path"]
new_path = f"{old_path}_eval"

# save validation params in validation path
world.learning_role.save_params(directory=new_path)
world.reset()
Expand All @@ -625,14 +637,16 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
perform_learning=False,
perform_evaluation=True,
eval_episode=eval_episode,
load_learned_path=new_path,
trained_actors_path=new_path,
)

world.run()

avg_reward = world.output_role.get_sum_reward()
# check reward improvement in validation run
world.learning_config["load_learned_path"] = old_path
if best_reward < avg_reward:
# save new best params for simulation
world.learning_config["trained_actors_path"] = old_path
if avg_reward > best_reward:
# update best reward
best_reward = avg_reward
world.learning_role.save_params(directory=old_path)

Expand Down
6 changes: 5 additions & 1 deletion assume/common/units_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,15 @@ def handle_market_feedback(self, content: ClearingMessage, meta: dict[str, str])
:type meta: dict[str, str]
"""
logger.debug(f"{self.id} got market result: {content}")
orderbook: Orderbook = content["orderbook"]
accepted_orders: Orderbook = content["accepted_orders"]
rejected_orders: Orderbook = content["rejected_orders"]
orderbook = accepted_orders + rejected_orders

for order in orderbook:
order["market_id"] = content["market_id"]
# map bid id to unit id
order["unit_id"] = self.bids_map[order["bid_id"]]

self.valid_orders.extend(orderbook)
marketconfig = self.registered_markets[content["market_id"]]
self.set_unit_dispatch(orderbook, marketconfig)
Expand Down
14 changes: 7 additions & 7 deletions assume/markets/base_market.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,34 +344,34 @@ async def clear_market(self, market_products: list[MarketProduct]):
(
accepted_orderbook,
rejected_orderbook,
# pending_orderbook,
market_meta,
) = self.clear(self.all_orders, market_products)
self.all_orders = []
for order in rejected_orderbook:
order["accepted_volume"] = 0
order["accepted_price"] = 0
order["accepted_price"] = market_meta[0]["price"]
self.open_auctions - set(market_products)
# self.all_orders = pending_orderbook

accepted_orderbook.sort(key=itemgetter("agent_id"))
rejected_orderbook.sort(key=itemgetter("agent_id"))
accepted_bids = {

accepted_orders = {
agent: list(bids)
for agent, bids in groupby(accepted_orderbook, itemgetter("agent_id"))
}
rejected_bids = {
rejected_orders = {
agent: list(bids)
for agent, bids in groupby(rejected_orderbook, itemgetter("agent_id"))
}

for agent in self.registered_agents:
addr, aid = agent
meta = {"sender_addr": self.context.addr, "sender_id": self.context.aid}
closing: ClearingMessage = {
"context": "clearing",
"market_id": self.marketconfig.name,
"orderbook": accepted_bids.get(agent, []),
"rejected": rejected_bids.get(agent, []),
"accepted_orders": accepted_orders.get(agent, []),
"rejected_orders": rejected_orders.get(agent, []),
}
await self.context.send_acl_message(
closing,
Expand Down
59 changes: 26 additions & 33 deletions assume/markets/clearing_algorithms/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ def clear(
meta = []
orderbook.sort(key=market_getter)
for product, product_orders in groupby(orderbook, market_getter):
accepted_product_orders: Orderbook = []
accepted_demand_orders: Orderbook = []
accepted_supply_orders: Orderbook = []
product_orders = list(product_orders)
if product not in market_products:
rejected_orders.extend(product_orders)
Expand Down Expand Up @@ -89,15 +90,18 @@ def clear(
# now add the next demand order
dem_vol += -demand_order["volume"]
demand_order["accepted_volume"] = demand_order["volume"]
to_commit: Orderbook = []

# and add supply until the demand order is matched
while supply_orders and gen_vol < dem_vol:
supply_order = supply_orders.pop(0)
if supply_order["price"] <= demand_order["price"]:
added = supply_order["volume"] - supply_order.get(
"accepted_volume", 0
)
should_insert = not supply_order.get("accepted_volume")
supply_order["accepted_volume"] = supply_order["volume"]
to_commit.append(supply_order)
gen_vol += supply_order["volume"]
if should_insert:
accepted_supply_orders.append(supply_order)
gen_vol += added
else:
rejected_orders.append(supply_order)
# now we know which orders we need
Expand All @@ -107,47 +111,39 @@ def clear(

if diff < 0:
# gen < dem
# generation is not enough - split last demand bid
split_demand_order = demand_order.copy()
split_demand_order["accepted_volume"] = diff
# generation is not enough - accept partially
demand_order["accepted_volume"] = demand_order["volume"] - diff
rejected_orders.append(split_demand_order)
elif diff > 0:
# generation left over - split last generation bid
supply_order = to_commit[-1]
split_supply_order = supply_order.copy()
split_supply_order["volume"] = diff
# generation left over - accept generation bid partially
supply_order = accepted_supply_orders[-1]
supply_order["accepted_volume"] = supply_order["volume"] - diff

# changed supply_order is still part of to_commit and will be added
# only volume-diff can be sold for current price
gen_vol -= diff

# add left over to supply_orders again
supply_orders.insert(0, split_supply_order)
nick-harder marked this conversation as resolved.
Show resolved Hide resolved
supply_orders.insert(0, supply_order)
demand_order["accepted_volume"] = demand_order["volume"]
else:
# diff == 0 perfect match
demand_order["accepted_volume"] = demand_order["volume"]

accepted_product_orders.append(demand_order)
accepted_product_orders.extend(to_commit)
accepted_demand_orders.append(demand_order)

for order in supply_orders:
rejected_orders.append(order)

# set clearing price - merit order - uniform pricing
accepted_supply_orders = [
x for x in accepted_product_orders if x["accepted_volume"] > 0
]
if accepted_supply_orders:
clear_price = max(map(itemgetter("price"), accepted_supply_orders))
clear_price = float(
max(map(itemgetter("price"), accepted_supply_orders))
)
else:
clear_price = 0

accepted_product_orders = accepted_demand_orders + accepted_supply_orders
for order in accepted_product_orders:
order["accepted_price"] = clear_price
accepted_demand_orders = [
x for x in accepted_product_orders if x["accepted_volume"] < 0
]
accepted_orders.extend(accepted_product_orders)

meta.append(
Expand Down Expand Up @@ -182,7 +178,8 @@ def clear(
meta = []
orderbook.sort(key=market_getter)
for product, product_orders in groupby(orderbook, market_getter):
accepted_product_orders: Orderbook = []
accepted_demand_orders: Orderbook = []
accepted_supply_orders: Orderbook = []
if product not in market_products:
rejected_orders.extend(product_orders)
# log.debug(f'found unwanted bids for {product} should be {market_products}')
Expand Down Expand Up @@ -242,27 +239,23 @@ def clear(
gen_vol -= diff

supply_orders.insert(0, split_supply_order)
demand_order["accepted_volume"] = demand_order["volume"]
else:
# diff == 0 perfect match
demand_order["accepted_volume"] = demand_order["volume"]

accepted_orders.append(demand_order)
accepted_demand_orders.append(demand_order)
# pay as bid
for supply_order in to_commit:
supply_order["accepted_price"] = supply_order["price"]

demand_order["accepted_price"] = supply_order["price"]
accepted_product_orders.extend(to_commit)
accepted_supply_orders.extend(to_commit)

for order in supply_orders:
rejected_orders.append(order)

accepted_supply_orders = [
x for x in accepted_product_orders if x["accepted_volume"] > 0
]
accepted_demand_orders = [
x for x in accepted_product_orders if x["accepted_volume"] < 0
]
accepted_product_orders = accepted_demand_orders + accepted_supply_orders

accepted_orders.extend(accepted_product_orders)
meta.append(
Expand Down
Loading