Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[StatsPerform] Bugfixes for tracking data (MA25) + Support for event data (MA3) #310

Merged
merged 8 commits into from
Jun 7, 2024
Merged
882 changes: 806 additions & 76 deletions docs/getting-started/statsperform.ipynb

Large diffs are not rendered by default.

21 changes: 14 additions & 7 deletions kloppy/_providers/opta.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from kloppy.config import get_config
from kloppy.infra.serializers.event.opta import (
OptaDeserializer,
OptaInputs,
from kloppy.infra.serializers.event.statsperform import (
StatsPerformDeserializer,
StatsPerformInputs,
)
from kloppy.domain import EventDataset, Optional, List, EventFactory
from kloppy.io import open_as_file, FileLike
Expand All @@ -18,13 +18,13 @@ def load(
Load Opta event data into a [`EventDataset`][kloppy.domain.models.event.EventDataset]

Parameters:
f7_data: filename of json containing the events
f24_data: filename of json containing the lineup information
f7_data: F7 xml feed containing the lineup information
f24_data: F24 xml feed containing the events
event_types:
coordinates:
event_factory:
"""
deserializer = OptaDeserializer(
deserializer = StatsPerformDeserializer(
event_types=event_types,
coordinate_system=coordinates,
event_factory=event_factory or get_config("event_factory"),
Expand All @@ -33,5 +33,12 @@ def load(
f24_data
) as f24_data_fp:
return deserializer.deserialize(
inputs=OptaInputs(f7_data=f7_data_fp, f24_data=f24_data_fp),
inputs=StatsPerformInputs(
meta_data=f7_data_fp,
meta_feed="F7",
meta_datatype="XML",
event_data=f24_data_fp,
event_feed="F24",
event_datatype="XML",
),
)
125 changes: 118 additions & 7 deletions kloppy/_providers/statsperform.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,39 @@
from typing import Optional
"""Functions to load Stats Perform data."""

from kloppy.domain import TrackingDataset
from typing import List, Optional

from kloppy.config import get_config
from kloppy.domain import EventDataset, EventFactory, Provider, TrackingDataset
from kloppy.infra.serializers.event.statsperform import (
StatsPerformDeserializer as StatsPerformEventDeserializer,
)
from kloppy.infra.serializers.event.statsperform import (
StatsPerformInputs as StatsPerformEventInputs,
)
from kloppy.infra.serializers.tracking.statsperform import (
StatsPerformDeserializer as StatsPerformTrackingDeserializer,
)
from kloppy.infra.serializers.tracking.statsperform import (
StatsPerformDeserializer,
StatsPerformInputs,
StatsPerformInputs as StatsPerformTrackingInputs,
)
from kloppy.io import FileLike, open_as_file
from kloppy.utils import deprecated


@deprecated("statsperform.load_tracking should be used")
def load(
meta_data: FileLike, # Stats Perform MA1 file - xml or json - single game, live data & lineups
raw_data: FileLike, # Stats Perform MA25 file - txt - tracking data
tracking_system: str = "sportvu",
pitch_length: Optional[float] = None,
pitch_width: Optional[float] = None,
sample_rate: Optional[float] = None,
limit: Optional[int] = None,
coordinates: Optional[str] = None,
only_alive: Optional[bool] = False,
) -> TrackingDataset:
deserializer = StatsPerformDeserializer(
deserializer = StatsPerformTrackingDeserializer(
provider=Provider[tracking_system.upper()],
sample_rate=sample_rate,
limit=limit,
coordinate_system=coordinates,
Expand All @@ -26,7 +43,101 @@ def load(
raw_data
) as raw_data_fp:
return deserializer.deserialize(
inputs=StatsPerformInputs(
meta_data=meta_data_fp, raw_data=raw_data_fp
inputs=StatsPerformTrackingInputs(
meta_data=meta_data_fp,
raw_data=raw_data_fp,
pitch_length=pitch_length,
pitch_width=pitch_width,
)
)


def load_event(
ma1_data: FileLike,
ma3_data: FileLike,
pitch_length: Optional[float] = None,
pitch_width: Optional[float] = None,
event_types: Optional[List[str]] = None,
coordinates: Optional[str] = None,
event_factory: Optional[EventFactory] = None,
) -> EventDataset:
"""Load Stats Perform event data.

Args:
ma1_data: MA1 json or xml feed containing the lineup information
ma3_data: MA3 json or xml feed containing the events
pitch_length: length of the pitch (in meters)
pitch_width: width of the pitch (in meters)
event_types: list of event types to load
coordinates: coordinate system to use
event_factory: a custom event factory

Returns:
EventDataset: the loaded event data
"""
deserializer = StatsPerformEventDeserializer(
event_types=event_types,
coordinate_system=coordinates,
event_factory=event_factory or get_config("event_factory"), # type: ignore
)
with open_as_file(ma1_data) as ma1_data_fp, open_as_file(
ma3_data
) as ma3_data_fp:
return deserializer.deserialize(
inputs=StatsPerformEventInputs(
meta_data=ma1_data_fp,
meta_feed="MA1",
event_data=ma3_data_fp,
event_feed="MA3",
pitch_length=pitch_length,
pitch_width=pitch_width,
),
)


def load_tracking(
ma1_data: FileLike,
ma25_data: FileLike,
tracking_system: str = "sportvu",
pitch_length: Optional[float] = None,
pitch_width: Optional[float] = None,
sample_rate: Optional[float] = None,
limit: Optional[int] = None,
coordinates: Optional[str] = None,
only_alive: Optional[bool] = False,
) -> TrackingDataset:
"""
Load Stats Perform tracking data.

Args:
ma1_data: json or xml feed containing the lineup information
ma25_data: txt file linked in the MA25 Match Tracking Feed; also known as an OPT file
tracking_system: system that generated the tracking data
pitch_length: length of the pitch (in meters)
pitch_width: width of the pitch (in meters)
sample_rate: sample the data at a specific rate
limit: limit the number of frames loaded
coordinates: coordinate system to use
only_alive: only include frames in which the game is not paused

Returns:
TrackingDataset: the loaded tracking data
"""
deserializer = StatsPerformTrackingDeserializer(
provider=Provider[tracking_system.upper()],
sample_rate=sample_rate,
limit=limit,
coordinate_system=coordinates,
only_alive=only_alive,
)
with open_as_file(ma1_data) as ma1_data_fp, open_as_file(
ma25_data
) as ma25_data_fp:
return deserializer.deserialize(
inputs=StatsPerformTrackingInputs(
meta_data=ma1_data_fp,
raw_data=ma25_data_fp,
pitch_length=pitch_length,
pitch_width=pitch_width,
)
)
129 changes: 91 additions & 38 deletions kloppy/domain/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class Provider(Enum):
KLOPPY:
DATAFACTORY:
STATSPERFORM:
SPORTVU:
"""

METRICA = "metrica"
Expand All @@ -109,6 +110,7 @@ class Provider(Enum):
KLOPPY = "kloppy"
DATAFACTORY = "datafactory"
STATSPERFORM = "statsperform"
SPORTVU = "sportvu"
OTHER = "other"

def __str__(self):
Expand Down Expand Up @@ -554,13 +556,26 @@ def vertical_orientation(self) -> VerticalOrientation:

@property
def pitch_dimensions(self) -> PitchDimensions:
return MetricPitchDimensions(
x_dim=Dimension(-1 * self.pitch_length / 2, self.pitch_length / 2),
y_dim=Dimension(-1 * self.pitch_width / 2, self.pitch_width / 2),
pitch_length=self.pitch_length,
pitch_width=self.pitch_width,
standardized=False,
).convert(to_unit=Unit.CENTIMETERS)
if self.pitch_length is not None and self.pitch_width is not None:
return MetricPitchDimensions(
x_dim=Dimension(
-1 * self.pitch_length / 2, self.pitch_length / 2
),
y_dim=Dimension(
-1 * self.pitch_width / 2, self.pitch_width / 2
),
pitch_length=self.pitch_length,
pitch_width=self.pitch_width,
standardized=False,
).convert(to_unit=Unit.CENTIMETERS)
else:
return MetricPitchDimensions(
x_dim=Dimension(None, None),
y_dim=Dimension(None, None),
pitch_length=None,
pitch_width=None,
standardized=False,
).convert(to_unit=Unit.CENTIMETERS)


@dataclass
Expand All @@ -579,13 +594,26 @@ def vertical_orientation(self) -> VerticalOrientation:

@property
def pitch_dimensions(self) -> PitchDimensions:
return MetricPitchDimensions(
x_dim=Dimension(-1 * self.pitch_length / 2, self.pitch_length / 2),
y_dim=Dimension(-1 * self.pitch_width / 2, self.pitch_width / 2),
pitch_length=self.pitch_length,
pitch_width=self.pitch_width,
standardized=False,
)
if self.pitch_length is not None and self.pitch_width is not None:
return MetricPitchDimensions(
x_dim=Dimension(
-1 * self.pitch_length / 2, self.pitch_length / 2
),
y_dim=Dimension(
-1 * self.pitch_width / 2, self.pitch_width / 2
),
pitch_length=self.pitch_length,
pitch_width=self.pitch_width,
standardized=False,
)
else:
return MetricPitchDimensions(
x_dim=Dimension(None, None),
y_dim=Dimension(None, None),
pitch_length=None,
pitch_width=None,
standardized=False,
)


@dataclass
Expand Down Expand Up @@ -650,13 +678,26 @@ def vertical_orientation(self) -> VerticalOrientation:

@property
def pitch_dimensions(self) -> PitchDimensions:
return MetricPitchDimensions(
x_dim=Dimension(-self.pitch_length / 2, self.pitch_length / 2),
y_dim=Dimension(-self.pitch_width / 2, self.pitch_width / 2),
pitch_length=self.pitch_length,
pitch_width=self.pitch_width,
standardized=False,
)
if self.pitch_length is not None and self.pitch_width is not None:
return MetricPitchDimensions(
x_dim=Dimension(
-1 * self.pitch_length / 2, self.pitch_length / 2
),
y_dim=Dimension(
-1 * self.pitch_width / 2, self.pitch_width / 2
),
pitch_length=self.pitch_length,
pitch_width=self.pitch_width,
standardized=False,
)
else:
return MetricPitchDimensions(
x_dim=Dimension(None, None),
y_dim=Dimension(None, None),
pitch_length=None,
pitch_width=None,
standardized=False,
)


@dataclass
Expand Down Expand Up @@ -720,13 +761,26 @@ def vertical_orientation(self) -> VerticalOrientation:

@property
def pitch_dimensions(self) -> PitchDimensions:
return MetricPitchDimensions(
x_dim=Dimension(-1 * self.pitch_length / 2, self.pitch_length / 2),
y_dim=Dimension(-1 * self.pitch_width / 2, self.pitch_width / 2),
pitch_length=self.pitch_length,
pitch_width=self.pitch_width,
standardized=False,
)
if self.pitch_length is not None and self.pitch_width is not None:
return MetricPitchDimensions(
x_dim=Dimension(
-1 * self.pitch_length / 2, self.pitch_length / 2
),
y_dim=Dimension(
-1 * self.pitch_width / 2, self.pitch_width / 2
),
pitch_length=self.pitch_length,
pitch_width=self.pitch_width,
standardized=False,
)
else:
return MetricPitchDimensions(
x_dim=Dimension(None, None),
y_dim=Dimension(None, None),
pitch_length=None,
pitch_width=None,
standardized=False,
)


@dataclass
Expand Down Expand Up @@ -764,10 +818,10 @@ def pitch_dimensions(self) -> PitchDimensions:


@dataclass
class StatsPerformCoordinateSystem(CoordinateSystem):
class SportVUCoordinateSystem(CoordinateSystem):
@property
def provider(self) -> Provider:
return Provider.STATSPERFORM
return Provider.SPORTVU

@property
def origin(self) -> Origin:
Expand All @@ -779,13 +833,12 @@ def vertical_orientation(self) -> VerticalOrientation:

@property
def pitch_dimensions(self) -> PitchDimensions:
# FIXME: This does not seem correct
return NormalizedPitchDimensions(
x_dim=Dimension(0, 100),
y_dim=Dimension(0, 100),
pitch_length=105,
pitch_width=68,
standardized=True,
return MetricPitchDimensions(
x_dim=Dimension(0, self.pitch_length),
y_dim=Dimension(0, self.pitch_width),
pitch_length=self.pitch_length,
pitch_width=self.pitch_width,
standardized=False,
)


Expand Down Expand Up @@ -838,7 +891,7 @@ def build_coordinate_system(
Provider.SKILLCORNER: SkillCornerCoordinateSystem,
Provider.DATAFACTORY: DatafactoryCoordinateSystem,
Provider.SECONDSPECTRUM: SecondSpectrumCoordinateSystem,
Provider.STATSPERFORM: StatsPerformCoordinateSystem,
Provider.SPORTVU: SportVUCoordinateSystem,
}

if provider in coordinate_systems:
Expand Down
Loading
Loading