Skip to content

Commit

Permalink
Merge pull request #264 from probberechts/fix/metrica-kickoff
Browse files Browse the repository at this point in the history
Remove duplicate Metrica set-piece events
  • Loading branch information
koenvo authored Dec 26, 2023
2 parents 897e9d0 + 8a86791 commit ea7f34e
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 56 deletions.
69 changes: 30 additions & 39 deletions kloppy/infra/serializers/event/metrica/json_deserializer.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,22 @@
from typing import Tuple, Dict, List, NamedTuple, IO
from typing import Dict, List, NamedTuple, IO, Optional
import logging
import json

from kloppy.domain import (
BallState,
BodyPart,
BodyPartQualifier,
CarryResult,
EventDataset,
Team,
PassResult,
Point,
BallState,
Provider,
PassEvent,
ShotEvent,
TakeOnEvent,
CarryEvent,
RecoveryEvent,
FoulCommittedEvent,
BallOutEvent,
GenericEvent,
PassResult,
Qualifier,
SetPieceQualifier,
SetPieceType,
ShotResult,
TakeOnResult,
CarryResult,
EventType,
SetPieceType,
SetPieceQualifier,
BodyPart,
BodyPartQualifier,
Qualifier,
Team,
)
from kloppy.infra.serializers.event.deserializer import EventDataDeserializer

Expand Down Expand Up @@ -93,10 +84,10 @@
OUT_EVENT_RESULTS = [PassResult.OUT]


def _parse_coordinates(event_start_or_end: dict) -> Point:
def _parse_coordinates(event_start_or_end: dict) -> Optional[Point]:
x = event_start_or_end["x"]
y = event_start_or_end["y"]
if x is None:
if x is None or y is None:
return None

return Point(
Expand All @@ -106,13 +97,11 @@ def _parse_coordinates(event_start_or_end: dict) -> Point:


def _parse_subtypes(event: dict) -> List:
if event["subtypes"]:
if isinstance(event["subtypes"], list):
return [subtype["id"] for subtype in event["subtypes"]]
else:
return [event["subtypes"]["id"]]
else:
return None
if event["subtypes"] is None:
return []
elif isinstance(event["subtypes"], list):
return [subtype["id"] for subtype in event["subtypes"]]
return [event["subtypes"]["id"]]


def _parse_pass(
Expand Down Expand Up @@ -209,7 +198,9 @@ def _parse_shot(event: Dict, previous_event: Dict, subtypes: List) -> Dict:
elif MS_SHOT_OUTCOME_GOAL in subtypes:
result = ShotResult.GOAL
else:
raise DeserializationError(f"Unknown shot outcome")
raise DeserializationError(
f"Unknown shot outcome: {', '.join(subtypes)}"
)

qualifiers = _get_event_qualifiers(event, previous_event, subtypes)

Expand All @@ -233,7 +224,7 @@ def _parse_take_on(subtypes: List) -> Dict:
return dict(result=result)


def _parse_ball_owning_team(event_type: int, team: Team) -> Team:
def _parse_ball_owning_team(event_type: int, team: Team) -> Optional[Team]:
if event_type not in [
MS_EVENT_TYPE_CHALLENGE,
MS_EVENT_TYPE_CARD,
Expand Down Expand Up @@ -269,7 +260,9 @@ def deserialize(self, inputs: MetricaJsonEventDataInputs) -> EventDataset:

with performance_logging("parse data", logger=logger):
events = []
for i, raw_event in enumerate(raw_events["data"]):
for previous_event, raw_event in zip(
[None] + raw_events["data"], raw_events["data"]
):
if raw_event["team"]["id"] == metadata.teams[0].team_id:
team = metadata.teams[0]
elif raw_event["team"]["id"] == metadata.teams[1].team_id:
Expand All @@ -287,7 +280,6 @@ def deserialize(self, inputs: MetricaJsonEventDataInputs) -> EventDataset:
for period in metadata.periods
if period.id == raw_event["period"]
][0]
previous_event = raw_events["data"][i - 1]

generic_event_kwargs = dict(
# from DataRecord
Expand All @@ -296,16 +288,17 @@ def deserialize(self, inputs: MetricaJsonEventDataInputs) -> EventDataset:
ball_owning_team=_parse_ball_owning_team(event_type, team),
ball_state=BallState.ALIVE,
# from Event
event_id=None,
event_id=str(raw_event["index"]),
team=team,
player=player,
coordinates=(_parse_coordinates(raw_event["start"])),
raw_event=raw_event,
)

iteration_events = []

if event_type in MS_PASS_TYPES:
if event_type == MS_SET_PIECE:
# set-piece events are integrated in the next pass or shot event
continue
elif event_type in MS_PASS_TYPES:
pass_event_kwargs = _parse_pass(
event=raw_event,
previous_event=previous_event,
Expand Down Expand Up @@ -338,9 +331,7 @@ def deserialize(self, inputs: MetricaJsonEventDataInputs) -> EventDataset:
)

elif event_type == MS_EVENT_TYPE_CARRY:
carry_event_kwargs = _parse_carry(
event=raw_event,
)
carry_event_kwargs = _parse_carry(event=raw_event)
event = self.event_factory.build_carry(
qualifiers=None,
**carry_event_kwargs,
Expand Down
2 changes: 1 addition & 1 deletion kloppy/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
import pytest


@pytest.fixture
@pytest.fixture(scope="session")
def base_dir() -> Path:
return Path(__file__).parent
63 changes: 47 additions & 16 deletions kloppy/tests/test_metrica_events.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
import pytest

from kloppy import metrica
Expand All @@ -8,32 +9,36 @@
AttackingDirection,
SetPieceType,
BodyPart,
EventDataset,
Point,
SetPieceQualifier,
BodyPartQualifier,
)
from kloppy.domain.models.common import DatasetType


class TestMetricaEvents:
@pytest.fixture
def meta_data(self, base_dir) -> str:
return base_dir / "files/epts_metrica_metadata.xml"
"""Tests related to deserialization of Metrica JSON events."""

@pytest.fixture
def event_data(self, base_dir) -> str:
return base_dir / "files/metrica_events.json"

def test_correct_deserialization(self, event_data: str, meta_data: str):
@pytest.fixture(scope="class")
def dataset(self, base_dir: Path) -> EventDataset:
"""Load a Metrica event dataset."""
dataset = metrica.load_event(
event_data=event_data, meta_data=meta_data
# FIXME: these files represent different matches
event_data=base_dir / "files" / "metrica_events.json",
meta_data=base_dir / "files" / "epts_metrica_metadata.xml",
)
assert dataset.dataset_type == DatasetType.EVENT
assert len(dataset.events) == 3594
return dataset

def test_metadata(self, dataset: EventDataset):
"""It should parse the metadata correctly."""
assert dataset.metadata.provider == Provider.METRICA
assert dataset.dataset_type == DatasetType.EVENT
assert len(dataset.events) == 3684
assert len(dataset.metadata.periods) == 2
assert dataset.metadata.orientation is Orientation.HOME_TEAM
assert dataset.metadata.teams[0].name == "Team A"
assert dataset.metadata.teams[1].name == "Team B"

player = dataset.metadata.teams[0].players[10]
assert player.player_id == "Track_11"
assert player.jersey_no == 11
Expand All @@ -53,8 +58,34 @@ def test_correct_deserialization(self, event_data: str, meta_data: str):
attacking_direction=AttackingDirection.NOT_SET,
)

assert dataset.events[1].coordinates.x == 0.50125
def test_coordinates(self, dataset: EventDataset):
"""It should parse the coordinates of events correctly."""
assert dataset.events[0].coordinates == Point(x=0.50125, y=0.48725)

# Check the qualifiers
assert dataset.records[1].qualifiers[0].value == SetPieceType.KICK_OFF
assert dataset.records[100].qualifiers[0].value == BodyPart.HEAD
def test_body_part_qualifiers(self, dataset: EventDataset):
"""It should add body part qualifiers to the event."""
# The body part qualifier should be set for headers
header = dataset.get_event_by_id("99")
assert header.get_qualifier_value(BodyPartQualifier) == BodyPart.HEAD
# It should be None (i.e., unknown) for events that are not headers
foot = dataset.get_event_by_id("2")
assert foot.get_qualifier_value(BodyPartQualifier) is None

def test_set_piece(self, dataset: EventDataset):
"""It should integrate set piece events in the next event."""
# The next event can be a pass
kick_off_event = dataset.get_event_by_id("1")
assert kick_off_event is None
pass_after_kick_off = dataset.get_event_by_id("2")
assert (
pass_after_kick_off.get_qualifier_value(SetPieceQualifier)
== SetPieceType.KICK_OFF
)
# or a shot
free_kick_event = dataset.get_event_by_id("130")
assert free_kick_event is None
shot_after_free_kick = dataset.get_event_by_id("131")
assert (
shot_after_free_kick.get_qualifier_value(SetPieceQualifier)
== SetPieceType.FREE_KICK
)

0 comments on commit ea7f34e

Please sign in to comment.