{ "cells": [ { "cell_type": "markdown", "id": "0999ea6f", "metadata": {}, "source": [ "# Wyscout\n", "\n", "- [Load local files](#load-local-files)\n", "- [Load remote files](#load-local-files)\n", "- [Load Open Data](#load-open-data)\n", "\n", "## Load local files" ] }, { "cell_type": "code", "execution_count": 1, "id": "f1120143", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>event_id</th>\n", " <th>event_type</th>\n", " <th>result</th>\n", " <th>success</th>\n", " <th>period_id</th>\n", " <th>timestamp</th>\n", " <th>end_timestamp</th>\n", " <th>ball_state</th>\n", " <th>ball_owning_team</th>\n", " <th>team_id</th>\n", " <th>...</th>\n", " <th>coordinates_x</th>\n", " <th>coordinates_y</th>\n", " <th>end_coordinates_x</th>\n", " <th>end_coordinates_y</th>\n", " <th>receiver_player_id</th>\n", " <th>is_counter_attack</th>\n", " <th>pass_type</th>\n", " <th>set_piece_type</th>\n", " <th>body_part_type</th>\n", " <th>goalkeeper_action_type</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>190078343</td>\n", " <td>PASS</td>\n", " <td>COMPLETE</td>\n", " <td>True</td>\n", " <td>1</td>\n", " <td>2.643377</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>1612</td>\n", " <td>...</td>\n", " <td>50.0</td>\n", " <td>50.0</td>\n", " <td>29.0</td>\n", " <td>41.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>SIMPLE_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>190078344</td>\n", " <td>PASS</td>\n", " <td>INCOMPLETE</td>\n", " <td>False</td>\n", " <td>1</td>\n", " <td>4.350302</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>1612</td>\n", " <td>...</td>\n", " <td>29.0</td>\n", " <td>41.0</td>\n", " <td>71.0</td>\n", " <td>94.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>HIGH_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>190079073</td>\n", " <td>PASS</td>\n", " <td>INCOMPLETE</td>\n", " <td>False</td>\n", " <td>1</td>\n", " <td>8.010654</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>1631</td>\n", " <td>...</td>\n", " <td>29.0</td>\n", " <td>6.0</td>\n", " <td>36.0</td>\n", " <td>3.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>HEAD_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>190079074</td>\n", " <td>PASS</td>\n", " <td>COMPLETE</td>\n", " <td>True</td>\n", " <td>1</td>\n", " <td>14.934050</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>1631</td>\n", " <td>...</td>\n", " <td>39.0</td>\n", " <td>0.0</td>\n", " <td>41.0</td>\n", " <td>13.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>HAND_PASS</td>\n", " <td>THROW_IN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>190079076</td>\n", " <td>PASS</td>\n", " <td>COMPLETE</td>\n", " <td>True</td>\n", " <td>1</td>\n", " <td>16.013375</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>1631</td>\n", " <td>...</td>\n", " <td>41.0</td>\n", " <td>13.0</td>\n", " <td>63.0</td>\n", " <td>15.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>HIGH_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>5 rows × 21 columns</p>\n", "</div>" ], "text/plain": [ " event_id event_type result success period_id timestamp \\\n", "0 190078343 PASS COMPLETE True 1 2.643377 \n", "1 190078344 PASS INCOMPLETE False 1 4.350302 \n", "2 190079073 PASS INCOMPLETE False 1 8.010654 \n", "3 190079074 PASS COMPLETE True 1 14.934050 \n", "4 190079076 PASS COMPLETE True 1 16.013375 \n", "\n", " end_timestamp ball_state ball_owning_team team_id ... coordinates_x \\\n", "0 None None None 1612 ... 50.0 \n", "1 None None None 1612 ... 29.0 \n", "2 None None None 1631 ... 29.0 \n", "3 None None None 1631 ... 39.0 \n", "4 None None None 1631 ... 41.0 \n", "\n", " coordinates_y end_coordinates_x end_coordinates_y receiver_player_id \\\n", "0 50.0 29.0 41.0 NaN \n", "1 41.0 71.0 94.0 NaN \n", "2 6.0 36.0 3.0 NaN \n", "3 0.0 41.0 13.0 NaN \n", "4 13.0 63.0 15.0 NaN \n", "\n", " is_counter_attack pass_type set_piece_type body_part_type \\\n", "0 False SIMPLE_PASS NaN NaN \n", "1 False HIGH_PASS NaN NaN \n", "2 False HEAD_PASS NaN NaN \n", "3 False HAND_PASS THROW_IN NaN \n", "4 False HIGH_PASS NaN NaN \n", "\n", " goalkeeper_action_type \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", "[5 rows x 21 columns]" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from kloppy import wyscout\n", "\n", "dataset = wyscout.load(\n", " event_data=\"../../kloppy/tests/files/wyscout_events.json\",\n", " \n", " # Optional arguments\n", " event_types=[\"shot\", \"pass\"],\n", " coordinates=\"wyscout\"\n", ")\n", "\n", "dataset.to_df().head()" ] }, { "cell_type": "markdown", "id": "caa16b3e", "metadata": {}, "source": [ "## Load remote files\n", "\n", "You can also directly read files from urls (http or https) by passing a url instead of a local path." ] }, { "cell_type": "code", "execution_count": 2, "id": "ada96b31", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>event_id</th>\n", " <th>event_type</th>\n", " <th>result</th>\n", " <th>success</th>\n", " <th>period_id</th>\n", " <th>timestamp</th>\n", " <th>end_timestamp</th>\n", " <th>ball_state</th>\n", " <th>ball_owning_team</th>\n", " <th>team_id</th>\n", " <th>...</th>\n", " <th>coordinates_x</th>\n", " <th>coordinates_y</th>\n", " <th>end_coordinates_x</th>\n", " <th>end_coordinates_y</th>\n", " <th>receiver_player_id</th>\n", " <th>is_counter_attack</th>\n", " <th>pass_type</th>\n", " <th>set_piece_type</th>\n", " <th>goalkeeper_action_type</th>\n", " <th>body_part_type</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>88178642</td>\n", " <td>PASS</td>\n", " <td>COMPLETE</td>\n", " <td>True</td>\n", " <td>1</td>\n", " <td>1.255990</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>4418</td>\n", " <td>...</td>\n", " <td>50.0</td>\n", " <td>48.0</td>\n", " <td>47.0</td>\n", " <td>50.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>SIMPLE_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>88178643</td>\n", " <td>PASS</td>\n", " <td>COMPLETE</td>\n", " <td>True</td>\n", " <td>1</td>\n", " <td>2.351908</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>4418</td>\n", " <td>...</td>\n", " <td>47.0</td>\n", " <td>50.0</td>\n", " <td>41.0</td>\n", " <td>48.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>SIMPLE_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>88178644</td>\n", " <td>PASS</td>\n", " <td>COMPLETE</td>\n", " <td>True</td>\n", " <td>1</td>\n", " <td>3.241028</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>4418</td>\n", " <td>...</td>\n", " <td>41.0</td>\n", " <td>48.0</td>\n", " <td>32.0</td>\n", " <td>35.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>SIMPLE_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>88178645</td>\n", " <td>PASS</td>\n", " <td>INCOMPLETE</td>\n", " <td>False</td>\n", " <td>1</td>\n", " <td>6.033681</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>4418</td>\n", " <td>...</td>\n", " <td>32.0</td>\n", " <td>35.0</td>\n", " <td>89.0</td>\n", " <td>6.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>HIGH_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>88178648</td>\n", " <td>PASS</td>\n", " <td>INCOMPLETE</td>\n", " <td>False</td>\n", " <td>1</td>\n", " <td>27.053006</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>4418</td>\n", " <td>...</td>\n", " <td>85.0</td>\n", " <td>0.0</td>\n", " <td>93.0</td>\n", " <td>16.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>HAND_PASS</td>\n", " <td>THROW_IN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>5 rows × 21 columns</p>\n", "</div>" ], "text/plain": [ " event_id event_type result success period_id timestamp \\\n", "0 88178642 PASS COMPLETE True 1 1.255990 \n", "1 88178643 PASS COMPLETE True 1 2.351908 \n", "2 88178644 PASS COMPLETE True 1 3.241028 \n", "3 88178645 PASS INCOMPLETE False 1 6.033681 \n", "4 88178648 PASS INCOMPLETE False 1 27.053006 \n", "\n", " end_timestamp ball_state ball_owning_team team_id ... coordinates_x \\\n", "0 None None None 4418 ... 50.0 \n", "1 None None None 4418 ... 47.0 \n", "2 None None None 4418 ... 41.0 \n", "3 None None None 4418 ... 32.0 \n", "4 None None None 4418 ... 85.0 \n", "\n", " coordinates_y end_coordinates_x end_coordinates_y receiver_player_id \\\n", "0 48.0 47.0 50.0 NaN \n", "1 50.0 41.0 48.0 NaN \n", "2 48.0 32.0 35.0 NaN \n", "3 35.0 89.0 6.0 NaN \n", "4 0.0 93.0 16.0 NaN \n", "\n", " is_counter_attack pass_type set_piece_type goalkeeper_action_type \\\n", "0 False SIMPLE_PASS NaN NaN \n", "1 False SIMPLE_PASS NaN NaN \n", "2 False SIMPLE_PASS NaN NaN \n", "3 False HIGH_PASS NaN NaN \n", "4 False HAND_PASS THROW_IN NaN \n", "\n", " body_part_type \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", "[5 rows x 21 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from kloppy import wyscout\n", "\n", "dataset = wyscout.load(\n", " event_data=\"https://raw.githubusercontent.com/koenvo/wyscout-soccer-match-event-dataset/main/processed/files/1694390.json\",\n", " \n", " # Optional arguments\n", " event_types=[\"shot\", \"pass\"],\n", " coordinates=\"wyscout\"\n", ")\n", "\n", "dataset.to_df().head()" ] }, { "cell_type": "markdown", "id": "a7cb2280", "metadata": {}, "source": [ "## Load open data\n", "\n", "For loading Wyscout open data you can also use `wyscout.load_open_data`. The api is very simular but you don't have to pass the urls.\n", "\n", "For more information on the available games please refer to [this GitHub repository](https://github.com/koenvo/wyscout-soccer-match-event-dataset) and have a look at [this Table](https://github.com/koenvo/wyscout-soccer-match-event-dataset/blob/main/processed-v2/README.md)." ] }, { "cell_type": "code", "execution_count": 3, "id": "8308124e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>event_id</th>\n", " <th>event_type</th>\n", " <th>result</th>\n", " <th>success</th>\n", " <th>period_id</th>\n", " <th>timestamp</th>\n", " <th>end_timestamp</th>\n", " <th>ball_state</th>\n", " <th>ball_owning_team</th>\n", " <th>team_id</th>\n", " <th>...</th>\n", " <th>coordinates_x</th>\n", " <th>coordinates_y</th>\n", " <th>end_coordinates_x</th>\n", " <th>end_coordinates_y</th>\n", " <th>receiver_player_id</th>\n", " <th>is_counter_attack</th>\n", " <th>pass_type</th>\n", " <th>set_piece_type</th>\n", " <th>goalkeeper_action_type</th>\n", " <th>body_part_type</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>88178642</td>\n", " <td>PASS</td>\n", " <td>COMPLETE</td>\n", " <td>True</td>\n", " <td>1</td>\n", " <td>1.255990</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>4418</td>\n", " <td>...</td>\n", " <td>50.0</td>\n", " <td>48.0</td>\n", " <td>47.0</td>\n", " <td>50.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>SIMPLE_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>88178643</td>\n", " <td>PASS</td>\n", " <td>COMPLETE</td>\n", " <td>True</td>\n", " <td>1</td>\n", " <td>2.351908</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>4418</td>\n", " <td>...</td>\n", " <td>47.0</td>\n", " <td>50.0</td>\n", " <td>41.0</td>\n", " <td>48.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>SIMPLE_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>88178644</td>\n", " <td>PASS</td>\n", " <td>COMPLETE</td>\n", " <td>True</td>\n", " <td>1</td>\n", " <td>3.241028</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>4418</td>\n", " <td>...</td>\n", " <td>41.0</td>\n", " <td>48.0</td>\n", " <td>32.0</td>\n", " <td>35.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>SIMPLE_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>88178645</td>\n", " <td>PASS</td>\n", " <td>INCOMPLETE</td>\n", " <td>False</td>\n", " <td>1</td>\n", " <td>6.033681</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>4418</td>\n", " <td>...</td>\n", " <td>32.0</td>\n", " <td>35.0</td>\n", " <td>89.0</td>\n", " <td>6.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>HIGH_PASS</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>88178648</td>\n", " <td>PASS</td>\n", " <td>INCOMPLETE</td>\n", " <td>False</td>\n", " <td>1</td>\n", " <td>27.053006</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>None</td>\n", " <td>4418</td>\n", " <td>...</td>\n", " <td>85.0</td>\n", " <td>0.0</td>\n", " <td>93.0</td>\n", " <td>16.0</td>\n", " <td>NaN</td>\n", " <td>False</td>\n", " <td>HAND_PASS</td>\n", " <td>THROW_IN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>5 rows × 21 columns</p>\n", "</div>" ], "text/plain": [ " event_id event_type result success period_id timestamp \\\n", "0 88178642 PASS COMPLETE True 1 1.255990 \n", "1 88178643 PASS COMPLETE True 1 2.351908 \n", "2 88178644 PASS COMPLETE True 1 3.241028 \n", "3 88178645 PASS INCOMPLETE False 1 6.033681 \n", "4 88178648 PASS INCOMPLETE False 1 27.053006 \n", "\n", " end_timestamp ball_state ball_owning_team team_id ... coordinates_x \\\n", "0 None None None 4418 ... 50.0 \n", "1 None None None 4418 ... 47.0 \n", "2 None None None 4418 ... 41.0 \n", "3 None None None 4418 ... 32.0 \n", "4 None None None 4418 ... 85.0 \n", "\n", " coordinates_y end_coordinates_x end_coordinates_y receiver_player_id \\\n", "0 48.0 47.0 50.0 NaN \n", "1 50.0 41.0 48.0 NaN \n", "2 48.0 32.0 35.0 NaN \n", "3 35.0 89.0 6.0 NaN \n", "4 0.0 93.0 16.0 NaN \n", "\n", " is_counter_attack pass_type set_piece_type goalkeeper_action_type \\\n", "0 False SIMPLE_PASS NaN NaN \n", "1 False SIMPLE_PASS NaN NaN \n", "2 False SIMPLE_PASS NaN NaN \n", "3 False HIGH_PASS NaN NaN \n", "4 False HAND_PASS THROW_IN NaN \n", "\n", " body_part_type \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", "[5 rows x 21 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from kloppy import wyscout\n", "\n", "dataset = wyscout.load_open_data(\n", " match_id=1694390,\n", " \n", " # Optional arguments\n", " event_types=[\"shot\", \"pass\"],\n", " coordinates=\"wyscout\"\n", ")\n", "\n", "dataset.to_df().head()" ] } ], "metadata": { "kernelspec": { "display_name": "kloppy-venv", "language": "python", "name": "kloppy-venv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 5 }