Skip to content

Helpers

Loading event data

All event load functions support options.

kloppy.helpers.load_statsbomb_event_data(event_data_filename, lineup_data_filename, options=None)

Load Statsbomb event data into a EventDataset

Parameters:

Name Type Description Default
event_data_filename str

filename of json containing the events

required
lineup_data_filename str

filename of json containing the lineup information

required
options dict None
Source code in kloppy/helpers.py
def load_statsbomb_event_data(
    event_data_filename: str, lineup_data_filename: str, options: dict = None
) -> EventDataset:
    """
    Load Statsbomb event data into a [`EventDataset`][kloppy.domain.models.event.EventDataset]

    Parameters:
        event_data_filename: filename of json containing the events
        lineup_data_filename: filename of json containing the lineup information
        options:
    """
    serializer = StatsBombSerializer()
    with open(event_data_filename, "rb") as event_data, open(
        lineup_data_filename, "rb"
    ) as lineup_data:

        return serializer.deserialize(
            inputs={"event_data": event_data, "lineup_data": lineup_data},
            options=options,
        )

kloppy.helpers.load_opta_event_data(f24_data_filename, f7_data_filename, options=None)

Load Opta event data into a EventDataset

Parameters:

Name Type Description Default
f24_data_filename str

filename of the f24 XML file containing the events

required
f7_data_filename str

filename of the f7 XML file containing metadata

required
options dict None
Source code in kloppy/helpers.py
def load_opta_event_data(
    f24_data_filename: str, f7_data_filename: str, options: dict = None
) -> EventDataset:
    """
    Load Opta event data into a [`EventDataset`][kloppy.domain.models.event.EventDataset]

    Parameters:
        f24_data_filename: filename of the f24 XML file containing the events
        f7_data_filename: filename of the f7 XML file containing metadata
        options:
    """
    serializer = OptaSerializer()
    with open(f24_data_filename, "rb") as f24_data, open(
        f7_data_filename, "rb"
    ) as f7_data:

        return serializer.deserialize(
            inputs={"f24_data": f24_data, "f7_data": f7_data},
            options=options,
        )

kloppy.helpers.load_metrica_json_event_data(raw_data_filename, metadata_filename, options=None)

Load Metrica event data into a EventDataset

Parameters:

Name Type Description Default
raw_data_filename str

filename of the json file containing the events

required
metadata_filename str

filename of the EPTS XML file containing metadata

required
options dict None
Source code in kloppy/helpers.py
def load_metrica_json_event_data(
    raw_data_filename: str, metadata_filename: str, options: dict = None
) -> EventDataset:
    """
    Load Metrica event data into a [`EventDataset`][kloppy.domain.models.event.EventDataset]

    Parameters:
        raw_data_filename: filename of the json file containing the events
        metadata_filename: filename of the EPTS XML file containing metadata
        options:
    """
    serializer = MetricaEventsJsonSerializer()
    with open(metadata_filename, "rb") as metadata, open(
        raw_data_filename, "rb"
    ) as raw_data:

        return serializer.deserialize(
            inputs={"metadata": metadata, "event_data": raw_data},
            options=options,
        )

kloppy.helpers.load_sportec_event_data(event_data_filename, match_data_filename, options=None)

Load Sportec event data into a EventDataset

Parameters:

Name Type Description Default
event_data_filename str

filename of the XML file containing the events

required
match_data_filename str

filename of the XML file containing the match information

required
options dict None
Source code in kloppy/helpers.py
def load_sportec_event_data(
    event_data_filename: str, match_data_filename: str, options: dict = None
) -> EventDataset:
    """
    Load Sportec event data into a [`EventDataset`][kloppy.domain.models.event.EventDataset]

    Parameters:
        event_data_filename: filename of the XML file containing the events
        match_data_filename: filename of the XML file containing the match information
        options:
    """
    serializer = SportecEventSerializer()
    with open(event_data_filename, "rb") as event_data, open(
        match_data_filename, "rb"
    ) as match_data:

        return serializer.deserialize(
            inputs={"event_data": event_data, "match_data": match_data},
            options=options,
        )

kloppy.helpers.load_wyscout_event_data(event_data_filename, options=None)

Load Wyscout event data into a EventDataset

Parameters:

Name Type Description Default
event_data_filename str

filename of the XML file containing the events and metadata

required
options dict None
Source code in kloppy/helpers.py
def load_wyscout_event_data(
    event_data_filename: str, options: dict = None
) -> EventDataset:
    """
    Load Wyscout event data into a [`EventDataset`][kloppy.domain.models.event.EventDataset]

    Parameters:
        event_data_filename: filename of the XML file containing the events and metadata
        options:
    """
    serializer = WyscoutSerializer()
    with open(event_data_filename, "rb") as event_data:
        return serializer.deserialize(
            inputs={"event_data": event_data}, options=options
        )

Options

Option Type Description Default
event_types List[str] Only load events of these types to the dataset. By default load all types. See EventTypes. Pass the name of the EventType. Example ["PASS", "SHOT"] None

Loading tracking data

Coming soon

Other helpers

kloppy.domain.services.state_builder.add_state(dataset, *builder_keys)

Add state

Parameters:

Name Type Description Default
- builder_keys

lineup score sequence

required

Examples:

>>> dataset = dataset.add_state('lineup', 'score')

Returns:

Type Description
EventDataset

EventDataset

Source code in kloppy/domain/services/state_builder/__init__.py
def add_state(dataset: EventDataset, *builder_keys: List[str]) -> EventDataset:
    """
    Add state

    Arguments:
        - builder_keys: `lineup` `score` `sequence`

    Examples:
        >>> dataset = dataset.add_state('lineup', 'score')

    Returns:
        [`EventDataset`][kloppy.domain.models.event.EventDataset]
    """
    if len(builder_keys) == 1 and isinstance(builder_keys[0], list):
        builder_keys = builder_keys[0]

    builders = {
        builder_key: create_state_builder(builder_key)
        for builder_key in builder_keys
    }

    state = {
        builder_key: builder.initial_state(dataset)
        for builder_key, builder in builders.items()
    }

    events = []
    for event in dataset.events:

        state = {
            builder_key: builder.reduce_before(state[builder_key], event)
            for builder_key, builder in builders.items()
        }

        events.append(replace(event, state=state))

        state = {
            builder_key: builder.reduce_after(state[builder_key], event)
            for builder_key, builder in builders.items()
        }

    return replace(dataset, records=events)

kloppy.helpers.to_pandas(dataset, _record_converter=None, additional_columns=None)

Convert Dataset to a pandas dataframe

Parameters:

Name Type Description Default
dataset Union[kloppy.domain.models.common.Dataset, List[kloppy.domain.models.common.DataRecord]]

Dataset to operate on. Don't pass this argument when you do dataset.to_pandas()

required
_record_converter Callable[[kloppy.domain.models.common.DataRecord], Dict]

Custom converter to go from record to DataRecord to Dict

None
additional_columns Dict[str, Union[Callable[[kloppy.domain.models.common.DataRecord], Any], Any]]

Additional columns to add to the dataframe

None

Examples:

>>> dataframe = dataset.to_pandas(additional_columns={
>>>    'player_name': lambda event: event.player.name
>>> })
Source code in kloppy/helpers.py
def to_pandas(
    dataset: Union[Dataset, List[DataRecord]],
    _record_converter: Callable[[DataRecord], Dict] = None,
    additional_columns: Dict[
        str, Union[Callable[[DataRecord], Any], Any]
    ] = None,
) -> "DataFrame":
    """
    Convert Dataset to a pandas dataframe

    Arguments:
        dataset: Dataset to operate on. Don't pass this argument when you do dataset.to_pandas()
        _record_converter: Custom converter to go from record to DataRecord to Dict
        additional_columns: Additional columns to add to the dataframe

    Examples:
        >>> dataframe = dataset.to_pandas(additional_columns={
        >>>    'player_name': lambda event: event.player.name
        >>> })
    """
    try:
        import pandas as pd
    except ImportError:
        raise Exception(
            "Seems like you don't have pandas installed. Please"
            " install it using: pip install pandas"
        )

    if isinstance(dataset, Dataset):
        records = dataset.records
    elif isinstance(dataset, list):
        records = dataset
    else:
        raise Exception("Unknown dataset type")

    if not records:
        return pd.DataFrame()

    if not _record_converter:
        if isinstance(dataset, TrackingDataset) or isinstance(
            records[0], Frame
        ):
            _record_converter = _frame_to_pandas_row_converter
        elif isinstance(dataset, EventDataset) or isinstance(
            records[0], Event
        ):
            _record_converter = _event_to_pandas_row_converter
        elif isinstance(dataset, CodeDataset) or isinstance(records[0], Code):
            _record_converter = _code_to_pandas_row_converter
        else:
            raise Exception("Don't know how to convert rows")

    def generic_record_converter(record: Union[Frame, Event]):
        row = _record_converter(record)
        if additional_columns:
            for k, v in additional_columns.items():
                if callable(v):
                    value = v(record)
                else:
                    value = v
                row.update({k: value})

        return row

    return pd.DataFrame.from_records(map(generic_record_converter, records))

kloppy.helpers.transform(dataset, to_orientation=None, to_pitch_dimensions=None)

Source code in kloppy/helpers.py
def transform(
    dataset: DatasetT, to_orientation=None, to_pitch_dimensions=None
) -> DatasetT:
    if to_orientation and isinstance(to_orientation, str):
        to_orientation = Orientation[to_orientation]
    if to_pitch_dimensions and (
        isinstance(to_pitch_dimensions, list)
        or isinstance(to_pitch_dimensions, tuple)
    ):
        to_pitch_dimensions = PitchDimensions(
            x_dim=Dimension(*to_pitch_dimensions[0]),
            y_dim=Dimension(*to_pitch_dimensions[1]),
        )
    return Transformer.transform_dataset(
        dataset=dataset,
        to_orientation=to_orientation,
        to_pitch_dimensions=to_pitch_dimensions,
    )