From 25045aa512c2d38dfca1217025f6cd5b9fcfd5ee Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Sat, 20 Jan 2024 12:28:08 +0100 Subject: [PATCH 1/2] refactor(tests): Refactor Opta tests --- kloppy/tests/test_opta.py | 467 ++++++++++++++++++++++++++------------ 1 file changed, 320 insertions(+), 147 deletions(-) diff --git a/kloppy/tests/test_opta.py b/kloppy/tests/test_opta.py index 59261a13..684fa2e9 100644 --- a/kloppy/tests/test_opta.py +++ b/kloppy/tests/test_opta.py @@ -1,197 +1,302 @@ import math +from datetime import datetime, timezone import pytest from kloppy.domain import ( AttackingDirection, - Period, - Orientation, - Provider, - Ground, - Point, + BallState, BodyPart, - SetPieceType, - PassType, - DatasetType, + BodyPartQualifier, + BodyPartQualifier, + CardQualifier, CardType, - FormationType, - GoalkeeperQualifier, - GoalkeeperActionType, + CounterAttackQualifier, + DatasetFlag, + DatasetType, + Dimension, DuelQualifier, DuelType, - ShotResult, - SetPieceQualifier, - CounterAttackQualifier, - BodyPartQualifier, - Point, - Point3D, -) - -from kloppy.domain.models.event import ( + EventDataset, EventType, + FormationType, + GoalkeeperActionType, + GoalkeeperQualifier, + Orientation, PassQualifier, - BodyPartQualifier, + PassType, + PitchDimensions, + Point, + Point, + Point3D, + Position, + Provider, + Score, + SetPieceQualifier, + SetPieceType, + ShotResult, + build_coordinate_system, ) - from kloppy import opta from kloppy.infra.serializers.event.opta.deserializer import ( _get_end_coordinates, + _parse_f24_datetime, ) -class TestOpta: - """""" +@pytest.fixture(scope="module") +def dataset(base_dir) -> EventDataset: + """Load Opta data for FC København - FC Nordsjælland""" + dataset = opta.load( + f7_data=base_dir / "files" / "opta_f7.xml", + f24_data=base_dir / "files" / "opta_f24.xml", + coordinates="opta", + ) + assert dataset.dataset_type == DatasetType.EVENT + return dataset - @pytest.fixture - def f24_data(self, base_dir) -> str: - return base_dir / "files/opta_f24.xml" - @pytest.fixture - def f7_data(self, base_dir) -> str: - return base_dir / "files/opta_f7.xml" +@pytest.mark.xfail +def test_parse_f24_datetime(): + """Test if the F24 datetime is correctly parsed""" + # timestamps have millisecond precision + assert ( + _parse_f24_datetime("2018-09-23T15:02:13.608") + == datetime( + 2018, 9, 23, 15, 2, 13, 608000, tzinfo=timezone.utc + ).timestamp() + ) + # milliseconds are not left-padded + assert ( + _parse_f24_datetime("2018-09-23T15:02:14.39") + == datetime( + 2018, 9, 23, 15, 2, 14, 39000, tzinfo=timezone.utc + ).timestamp() + ) - def test_correct_deserialization(self, f7_data: str, f24_data: str): - dataset = opta.load( - f24_data=f24_data, f7_data=f7_data, coordinates="opta" - ) + +class TestOptaMetadata: + """Tests related to deserializing metadata (i.e., the F7 feed)""" + + def test_provider(self, dataset): + """It should set the Opta provider""" assert dataset.metadata.provider == Provider.OPTA - assert dataset.dataset_type == DatasetType.EVENT - assert len(dataset.events) == 33 - assert len(dataset.metadata.periods) == 5 - assert ( - dataset.events[10].ball_owning_team == dataset.metadata.teams[1] - ) # 1594254267 - assert ( - dataset.events[15].ball_owning_team == dataset.metadata.teams[0] - ) # 2087733359 + + def test_orientation(self, dataset): + """It should set the action-executing-team orientation""" assert ( dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM ) + + def test_framerate(self, dataset): + """It should set the frame rate to None""" + assert dataset.metadata.frame_rate is None + + def test_teams(self, dataset): + """It should create the teams and player objects""" + # There should be two teams with the correct names and starting formations assert dataset.metadata.teams[0].name == "FC København" - assert dataset.metadata.teams[0].ground == Ground.HOME assert dataset.metadata.teams[0].starting_formation == FormationType( "4-4-2" ) assert dataset.metadata.teams[1].name == "FC Nordsjælland" - assert dataset.metadata.teams[1].ground == Ground.AWAY assert dataset.metadata.teams[1].starting_formation == FormationType( "4-3-3" ) - - player = dataset.metadata.teams[0].players[0] + # The teams should have the correct players + player = dataset.metadata.teams[0].get_player_by_id("111319") assert player.player_id == "111319" assert player.jersey_no == 21 assert str(player) == "Jesse Joronen" - assert player.position.position_id == "1" - assert player.position.name == "Goalkeeper" - - assert dataset.metadata.periods[0] == Period( - id=1, - start_timestamp=1537714933.608, - end_timestamp=1537717701.222, - attacking_direction=AttackingDirection.NOT_SET, + + def test_player_position(self, dataset): + """It should set the correct player position from the events""" + # Starting players have a position + player = dataset.metadata.teams[0].get_player_by_id("111319") + assert player.position == Position( + position_id="1", name="Goalkeeper", coordinates=None + ) + assert player.starting + + # Substituted players have a "Substitute" position + sub_player = dataset.metadata.teams[0].get_player_by_id("88022") + assert sub_player.position == Position( + position_id="0", name="Substitute", coordinates=None ) - assert dataset.metadata.periods[1] == Period( - id=2, - start_timestamp=1537718728.873, - end_timestamp=1537721737.788, - attacking_direction=AttackingDirection.NOT_SET, + assert not sub_player.starting + + def test_periods(self, dataset): + """It should create the periods""" + assert len(dataset.metadata.periods) == 5 + assert dataset.metadata.periods[0].id == 1 + period_starts = [ + _parse_f24_datetime("2018-09-23T15:02:13.608"), + _parse_f24_datetime("2018-09-23T16:05:28.873"), + _parse_f24_datetime("2018-09-23T17:50:01.810"), + _parse_f24_datetime("2018-09-23T18:35:01.810"), + _parse_f24_datetime("2018-09-23T19:05:01.810"), + ] + period_ends = [ + _parse_f24_datetime("2018-09-23T15:48:21.222"), + _parse_f24_datetime("2018-09-23T16:55:37.788"), + _parse_f24_datetime("2018-09-23T18:20:01.810"), + _parse_f24_datetime("2018-09-23T18:50:01.810"), + _parse_f24_datetime("2018-09-23T19:25:01.810"), + ] + for i, period in enumerate(dataset.metadata.periods): + assert period.id == i + 1 + assert period.start_timestamp == period_starts[i] + assert period.end_timestamp == period_ends[i] + assert period.attacking_direction == AttackingDirection.NOT_SET + + def test_pitch_dimensions(self, dataset): + """It should set the correct pitch dimensions""" + assert dataset.metadata.pitch_dimensions == PitchDimensions( + x_dim=Dimension(0, 100), y_dim=Dimension(0, 100) ) - assert dataset.metadata.periods[4] == Period( - id=5, - start_timestamp=1537729501.81, - end_timestamp=1537730701.81, - attacking_direction=AttackingDirection.NOT_SET, + + def test_coordinate_system(self, dataset): + """It should set the correct coordinate system""" + assert dataset.metadata.coordinate_system == build_coordinate_system( + Provider.OPTA, width=100, length=100 ) - assert dataset.events[0].coordinates == Point(50.1, 49.4) + @pytest.mark.xfail + def test_score(self, dataset): + """It should set the correct score""" + assert dataset.metadata.score == Score(home=2, away=1) - # Check the qualifiers - assert ( - dataset.events[0].qualifiers[0].value == SetPieceType.KICK_OFF - ) # 1510681159 - assert ( - BodyPartQualifier(value=BodyPart.HEAD) - in dataset.events[6].qualifiers - ) # 1101592119 - assert ( - PassQualifier(value=PassType.THROUGH_BALL) - in dataset.events[6].qualifiers - ) # 1101592119 + @pytest.mark.xfail + def test_flags(self, dataset): + """It should set the correct flags""" assert ( - dataset.events[5].qualifiers[0].value == PassType.CHIPPED_PASS - ) # 1444075194 - assert ( - dataset.events[19].qualifiers[0].value == CardType.RED - ) # 2318695229 - assert ( - dataset.events[21].event_type == EventType.CLEARANCE - ) # 2498907287 + dataset.metadata.flags + == DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE + ) - # Check receiver coordinates for incomplete passes - assert dataset.events[6].receiver_coordinates.x == 45.5 - assert dataset.events[6].receiver_coordinates.y == 68.2 - # Check timestamp from qualifier in case of goal - assert dataset.events[17].timestamp == 139.65200018882751 # 2318695229 - # assert dataset.events[17].coordinates_y == 12 +class TestOptaEvent: + """Generic tests related to deserializing events (i.e., the F24 feed)""" - # Check Own goal - assert dataset.events[18].result.value == "OWN_GOAL" # 2318697001 - # Check OFFSIDE pass has end_coordinates - assert dataset.events[20].receiver_coordinates.x == 89.3 # 2360555167 + def test_generic_attributes(self, dataset: EventDataset): + """Test generic event attributes""" + event = dataset.get_event_by_id("1510681159") + assert event.event_id == "1510681159" + assert event.team.name == "FC København" + assert event.ball_owning_team.name == "FC København" + assert event.player.full_name == "Dame N'Doye" + assert event.coordinates == Point(50.1, 49.4) + assert event.raw_event.attrib["id"] == "1510681159" + assert event.related_event_ids == [] + assert event.period.id == 1 + assert event.timestamp == ( + _parse_f24_datetime("2018-09-23T15:02:14.39") # event timestamp + - _parse_f24_datetime("2018-09-23T15:02:13.608") # period start + ) + assert event.ball_state == BallState.ALIVE - # Check goalkeeper qualifiers - assert ( - dataset.events[23].get_qualifier_value(GoalkeeperQualifier) - == GoalkeeperActionType.SAVE + def test_correct_normalized_deserialization(self, base_dir): + """Test if the normalized deserialization is correct""" + dataset = opta.load( + f7_data=base_dir / "files" / "opta_f7.xml", + f24_data=base_dir / "files" / "opta_f24.xml", ) + event = dataset.get_event_by_id("1510681159") + assert event.coordinates == Point(0.501, 0.506) + + def test_ball_owning_team(self, dataset: EventDataset): + """Test if the ball owning team is correctly set""" assert ( - dataset.events[24].get_qualifier_value(GoalkeeperQualifier) - == GoalkeeperActionType.CLAIM + dataset.get_event_by_id("1594254267").ball_owning_team + == dataset.metadata.teams[1] ) assert ( - dataset.events[25].get_qualifier_value(GoalkeeperQualifier) - == GoalkeeperActionType.PUNCH + dataset.get_event_by_id("2087733359").ball_owning_team + == dataset.metadata.teams[0] ) + + def test_setpiece_qualifiers(self, dataset: EventDataset): + """Test if the qualifiers are correctly deserialized""" + kick_off = dataset.get_event_by_id("1510681159") assert ( - dataset.events[26].get_qualifier_value(GoalkeeperQualifier) - == GoalkeeperActionType.PICK_UP + kick_off.get_qualifier_value(SetPieceQualifier) + == SetPieceType.KICK_OFF ) + + def test_body_part_qualifiers(self, dataset: EventDataset): + """Test if the body part qualifiers are correctly deserialized""" + header = dataset.get_event_by_id("1101592119") + assert BodyPart.HEAD in header.get_qualifier_values(BodyPartQualifier) + + def test_card_qualifiers(self, dataset: EventDataset): + """Test if the card qualifiers are correctly deserialized""" + red_card = dataset.get_event_by_id("2318454729") + assert red_card.get_qualifier_value(CardQualifier) == CardType.RED + + def test_counter_attack_qualifiers(self, dataset: EventDataset): + """Test if the counter attack qualifiers are correctly deserialized""" + counter_attack = dataset.get_event_by_id("2318695229") assert ( - dataset.events[27].get_qualifier_value(GoalkeeperQualifier) - == GoalkeeperActionType.SMOTHER + counter_attack.get_qualifier_value(CounterAttackQualifier) is True ) - assert ( - dataset.events[28].event_type == EventType.INTERCEPTION - ) # 2609934569 - assert ( - dataset.events[29].event_type == EventType.MISCONTROL - ) # 250913217 - # Check counterattack - assert ( - CounterAttackQualifier(value=True) in dataset.events[17].qualifiers - ) # 2318695229 - # Check DuelQualifiers - assert DuelType.AERIAL in dataset.events[7].get_qualifier_values( - DuelQualifier +class TestOptaPassEvent: + """Tests related to deserialzing pass events""" + + def test_deserialize_all(self, dataset: EventDataset): + """It should deserialize all clearance events""" + events = dataset.find_all("pass") + assert len(events) == 14 + + def test_receiver_coordinates(self, dataset: EventDataset): + """Test if the receiver coordinates are correctly deserialized""" + # Check receiver coordinates for incomplete passes + incomplete_pass = dataset.get_event_by_id("1101592119") + assert incomplete_pass.receiver_coordinates.x == 45.5 + assert incomplete_pass.receiver_coordinates.y == 68.2 + + def test_end_coordinates(self, dataset: EventDataset): + """Test if the end coordinates are correctly deserialized""" + pass_event = dataset.get_event_by_id("2360555167") + assert pass_event.receiver_coordinates.x == 89.3 + + def test_pass_qualifiers(self, dataset: EventDataset): + """Test if the pass type qualfiers are correctly deserialized""" + through_ball = dataset.get_event_by_id("1101592119") + assert PassType.THROUGH_BALL in through_ball.get_qualifier_values( + PassQualifier ) - assert ( - dataset.events[8].get_qualifier_values(DuelQualifier)[1] - == DuelType.GROUND + chipped_pass = dataset.get_event_by_id("1444075194") + assert PassType.CHIPPED_PASS in chipped_pass.get_qualifier_values( + PassQualifier ) - def test_shot(self, f7_data: str, f24_data: str): - dataset = opta.load( - f24_data=f24_data, - f7_data=f7_data, - event_types=["shot"], - coordinates="opta", - ) - assert len(dataset.events) == 3 +class TestOptaClearanceEvent: + """Tests related to deserialzing clearance events""" + + def test_deserialize_all(self, dataset: EventDataset): + """It should deserialize all clearance events""" + events = dataset.find_all("clearance") + assert len(events) == 2 + + def test_correct_deserialization(self, dataset: EventDataset): + """Test if the clearance event is correctly deserialized""" + clearance = dataset.get_event_by_id("2498907287") + assert clearance.event_type == EventType.CLEARANCE + + +class TestOptaShotEvent: + """Tests related to deserialzing shot events""" + + def test_deserialize_all(self, dataset: EventDataset): + """It should deserialize all shot events""" + events = dataset.find_all("shot") + assert len(events) == 3 + + def test_correct_deserialization(self, dataset: EventDataset): + """Test if the shot event is correctly deserialized""" shot = dataset.get_event_by_id("2318695229") # A shot event should have a result assert shot.result == ShotResult.GOAL @@ -202,6 +307,14 @@ def test_shot(self, f7_data: str, f24_data: str): shot.get_qualifier_value(BodyPartQualifier) == BodyPart.LEFT_FOOT ) + def test_timestamp_goal(self, dataset: EventDataset): + """Check timestamp from qualifier in case of goal""" + goal = dataset.get_event_by_id("2318695229") + assert goal.timestamp == ( + _parse_f24_datetime("2018-09-23T16:07:48.525") # event timestamp + - _parse_f24_datetime("2018-09-23T16:05:28.873") # period start + ) + def test_shot_end_coordinates(self): """Shots should receive the correct end coordinates.""" # When no end coordinates are available, we return None @@ -276,24 +389,84 @@ def test_shot_end_coordinates(self): blocked_shot_on_target_qualifiers, start_coordinates ) == Point3D(x=99.1, y=52.5, z=opp_small) - def test_own_goal(self, f7_data: str, f24_data: str): - dataset = opta.load( - f24_data=f24_data, - f7_data=f7_data, - event_types=["shot"], - coordinates="opta", - ) - + def test_own_goal(self, dataset: EventDataset): + """Test if own goals are correctly deserialized""" own_goal = dataset.get_event_by_id("2318697001") assert own_goal.result == ShotResult.OWN_GOAL # Use the inverse coordinates of the goal location assert own_goal.result_coordinates == Point3D(0.0, 100 - 45.6, 1.9) - def test_correct_normalized_deserialization( - self, f7_data: str, f24_data: str - ): - dataset = opta.load( - f24_data=f24_data, - f7_data=f7_data, + +class TestOptaDuelEvent: + """Tests related to deserialzing duel events""" + + def test_deserialize_all(self, dataset: EventDataset): + """It should deserialize all duel events""" + events = dataset.find_all("duel") + assert len(events) == 3 + + def test_qualifiers(self, dataset: EventDataset): + """Test if the qualifiers are correctly deserialized""" + aerial_duel = dataset.get_event_by_id("1274474573") + assert DuelType.AERIAL in aerial_duel.get_qualifier_values( + DuelQualifier + ) + ground_duel = dataset.get_event_by_id("2140914735") + assert DuelType.GROUND in ground_duel.get_qualifier_values( + DuelQualifier + ) + + +class TestOptaGoalkeeperEvent: + """Tests related to deserialzing goalkeeper events""" + + def test_deserialize_all(self, dataset: EventDataset): + """It should deserialize all goalkeeper events""" + events = dataset.find_all("goalkeeper") + assert len(events) == 5 + + def test_qualifiers(self, dataset: EventDataset): + """Test if the qualifiers are correctly deserialized""" + save = dataset.get_event_by_id("2451170467") + assert ( + save.get_qualifier_value(GoalkeeperQualifier) + == GoalkeeperActionType.SAVE + ) + claim = dataset.get_event_by_id("2453149143") + assert ( + claim.get_qualifier_value(GoalkeeperQualifier) + == GoalkeeperActionType.CLAIM + ) + punch = dataset.get_event_by_id("2451094707") + assert ( + punch.get_qualifier_value(GoalkeeperQualifier) + == GoalkeeperActionType.PUNCH + ) + keeper_pick_up = dataset.get_event_by_id("2451098837") + assert ( + keeper_pick_up.get_qualifier_value(GoalkeeperQualifier) + == GoalkeeperActionType.PICK_UP + ) + smother = dataset.get_event_by_id("2438594253") + assert ( + smother.get_qualifier_value(GoalkeeperQualifier) + == GoalkeeperActionType.SMOTHER ) - assert dataset.events[0].coordinates == Point(0.501, 0.506) + + +class TestOptaInterceptionEvent: + """Tests related to deserialzing interception events""" + + def test_correct_deserialization(self, dataset: EventDataset): + """Test if the interception event is correctly deserialized""" + event = dataset.get_event_by_id("2609934569") + assert event.event_type == EventType.INTERCEPTION + + +class TestOptaMiscontrolEvent: + """Tests related to deserialzing miscontrol events""" + + def test_correct_deserialization(self, dataset: EventDataset): + """Test if the miscontrol event is correctly deserialized""" + event = dataset.get_event_by_id("2509132175") + assert event.event_type == EventType.MISCONTROL From 25021d9fe8eb0a588a287d07695e5030a9c7d3f1 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Sat, 20 Jan 2024 12:53:36 +0100 Subject: [PATCH 2/2] fix(opta): Fix Opta F24 timestamp parsing Opta does not zero-pad milliseconds. Therefore, they were incorrectly parsed by Python's default "%f" format code. See also #267 --- kloppy/infra/serializers/event/opta/deserializer.py | 5 +++++ kloppy/tests/test_opta.py | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/kloppy/infra/serializers/event/opta/deserializer.py b/kloppy/infra/serializers/event/opta/deserializer.py index 01a81e25..e2bbce35 100644 --- a/kloppy/infra/serializers/event/opta/deserializer.py +++ b/kloppy/infra/serializers/event/opta/deserializer.py @@ -246,6 +246,11 @@ def _parse_f24_datetime(dt_str: str) -> float: + def zero_pad_milliseconds(timestamp): + parts = timestamp.split(".") + return ".".join(parts[:-1] + ["{:03d}".format(int(parts[-1]))]) + + dt_str = zero_pad_milliseconds(dt_str) return ( datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f") .replace(tzinfo=pytz.utc) diff --git a/kloppy/tests/test_opta.py b/kloppy/tests/test_opta.py index 684fa2e9..f4be28a4 100644 --- a/kloppy/tests/test_opta.py +++ b/kloppy/tests/test_opta.py @@ -56,7 +56,6 @@ def dataset(base_dir) -> EventDataset: return dataset -@pytest.mark.xfail def test_parse_f24_datetime(): """Test if the F24 datetime is correctly parsed""" # timestamps have millisecond precision