Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add artificial formation change event in Wyscout deserializer #250

Merged
merged 5 commits into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 98 additions & 1 deletion kloppy/infra/serializers/event/wyscout/deserializer_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@
TakeOnEvent,
TakeOnResult,
Team,
FormationType,
)
from kloppy.exceptions import DeserializationError
from kloppy.utils import performance_logging

from ..deserializer import EventDataDeserializer
Expand All @@ -52,6 +54,29 @@

INVALID_PLAYER = "0"

formations = {
"4-4-2": FormationType.FOUR_FOUR_TWO,
"4-4-1-1": FormationType.FOUR_FOUR_ONE_ONE,
"4-3-2-1": FormationType.FOUR_THREE_TWO_ONE,
"4-2-3-1": FormationType.FOUR_TWO_THREE_ONE,
"4-1-4-1": FormationType.FOUR_ONE_FOUR_ONE,
"4-1-3-2": FormationType.FOUR_ONE_THREE_TWO,
"4-3-1-2": FormationType.FOUR_THREE_ONE_TWO,
"4-3-3": FormationType.FOUR_THREE_THREE,
"4-5-1": FormationType.FOUR_FIVE_ONE,
"4-2-2-2": FormationType.FOUR_TWO_TWO_TWO,
"4-2-1-3": FormationType.FOUR_TWO_ONE_THREE,
"3-4-3": FormationType.THREE_FOUR_THREE,
"3-4-1-2": FormationType.THREE_FOUR_ONE_TWO,
"3-4-2-1": FormationType.THREE_FOUR_TWO_ONE,
"3-5-2": FormationType.THREE_FIVE_TWO,
"3-5-1-1": FormationType.THREE_FIVE_ONE_ONE,
"5-3-2": FormationType.FIVE_THREE_TWO,
"5-4-1": FormationType.FIVE_FOUR_ONE,
"3-3-3-1": FormationType.THREE_THREE_THREE_ONE,
"3-2-3-2": FormationType.THREE_TWO_THREE_TWO,
}


def _parse_team(raw_events, wyId: str, ground: Ground) -> Team:
team = Team(
Expand Down Expand Up @@ -363,6 +388,50 @@ def _parse_duel(raw_event: Dict) -> Dict:
return {"result": result, "qualifiers": qualifiers}


def get_home_away_team_formation(event, team):
if team.ground == Ground.HOME:
current_home_team_formation = formations[event["team"]["formation"]]
current_away_team_formation = formations[
event["opponentTeam"]["formation"]
]
elif team.ground == Ground.AWAY:
current_away_team_formation = formations[event["team"]["formation"]]
current_home_team_formation = formations[
event["opponentTeam"]["formation"]
]
else:
raise DeserializationError(f"Unknown team_id {team.team_id}")
Copy link
Contributor

@koenvo koenvo Dec 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great to see you used this existing exception.


return current_home_team_formation, current_away_team_formation


def identify_artificial_formation_change_event(
raw_event, raw_next_event, teams, home_team, away_team
):
current_event_team = teams[str(raw_event["team"]["id"])]
next_event_team = teams[str(raw_next_event["team"]["id"])]
event_formation_change_info = {}
(
current_home_team_formation,
current_away_team_formation,
) = get_home_away_team_formation(raw_event, current_event_team)
(
next_home_team_formation,
next_away_team_formation,
) = get_home_away_team_formation(raw_next_event, next_event_team)
if next_home_team_formation != current_home_team_formation:
event_formation_change_info[home_team] = {
"formation_type": next_home_team_formation
}

if next_away_team_formation != current_away_team_formation:
event_formation_change_info[away_team] = {
"formation_type": next_away_team_formation
}

return event_formation_change_info


def _players_to_dict(players: List[Player]):
return {player.player_id: player for player in players}

Expand Down Expand Up @@ -547,12 +616,40 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
result=None,
qualifiers=_generic_qualifiers(raw_event),
event_name=raw_event["type"]["primary"],
**generic_event_args
**generic_event_args,
)

if event and self.should_include_event(event):
events.append(transformer.transform_event(event))

if next_event:
event_formation_change_info = (
identify_artificial_formation_change_event(
raw_event, next_event, teams, home_team, away_team
)
)
for (
formation_change_team,
formation_change_event_kwargs,
) in event_formation_change_info.items():
generic_event_args.update(
{
"event_id": None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to give this event an id. I checked the Metrica implementation which also creates synthetic events and it seems it's reusing the original event id.

The eventIds should be unique over a match. Maybe you can add a prefix to the original eventId and use that one?

And would be good to use the term 'synthetic' instead of 'artificial'.

"raw_event": None,
"coordinates": None,
"player": None,
"team": formation_change_team,
}
)
event = self.event_factory.build_formation_change(
result=None,
qualifiers=None,
**formation_change_event_kwargs,
**generic_event_args,
)
if event and self.should_include_event(event):
events.append(transformer.transform_event(event))

metadata = Metadata(
teams=[home_team, away_team],
periods=periods,
Expand Down
6 changes: 3 additions & 3 deletions kloppy/tests/files/wyscout_events_v3.json
Original file line number Diff line number Diff line change
Expand Up @@ -946,12 +946,12 @@
"y": 90
},
"team": {
"formation": "4-2-3-1",
"formation": "4-3-3",
"id": 3166,
"name": "Bologna"
},
"opponentTeam": {
"formation": "3-4-3",
"formation": "4-4-2",
"id": 3185,
"name": "Torino"
},
Expand Down Expand Up @@ -983,7 +983,7 @@
"y": 10
},
"team": {
"formation": "4-2-3-1",
"formation": "4-3-3",
"id": 3166,
"name": "Bologna"
},
Expand Down
4 changes: 4 additions & 0 deletions kloppy/tests/test_wyscout.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ def test_correct_v3_deserialization(self, event_v3_data: Path):
)
assert dataset.events[9].event_type == EventType.CLEARANCE
assert dataset.events[12].event_type == EventType.INTERCEPTION
assert (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great!

dataset.events[13].event_type == EventType.FORMATION_CHANGE
and dataset.events[14].event_type == EventType.FORMATION_CHANGE
)

def test_correct_normalized_v3_deserialization(self, event_v3_data: Path):
dataset = wyscout.load(event_data=event_v3_data, data_version="V3")
Expand Down