{ "cells": [ { "cell_type": "markdown", "id": "eb7a4c5f", "metadata": {}, "source": [ "# SkillCorner\n", "\n", "- [Load local files](#load-local-files)\n", "- [Load remote open data files](#load-remote-open-data-files)\n", "- [Load remote files](#load-remote-files)\n", "\n", "## Load local files" ] }, { "cell_type": "code", "execution_count": 1, "id": "71e23535", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>period_id</th>\n", " <th>timestamp</th>\n", " <th>frame_id</th>\n", " <th>ball_state</th>\n", " <th>ball_owning_team_id</th>\n", " <th>ball_x</th>\n", " <th>ball_y</th>\n", " <th>ball_z</th>\n", " <th>home_22_x</th>\n", " <th>home_22_y</th>\n", " <th>...</th>\n", " <th>away_anon_304_d</th>\n", " <th>away_anon_304_s</th>\n", " <th>home_anon_368_x</th>\n", " <th>home_anon_368_y</th>\n", " <th>home_anon_368_d</th>\n", " <th>home_anon_368_s</th>\n", " <th>home_anon_423_x</th>\n", " <th>home_anon_423_y</th>\n", " <th>home_anon_423_d</th>\n", " <th>home_anon_423_s</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>12.0</td>\n", " <td>1531</td>\n", " <td>None</td>\n", " <td>NaN</td>\n", " <td>18.887029</td>\n", " <td>30.037350</td>\n", " <td>1.246213</td>\n", " <td>14.195641</td>\n", " <td>23.489013</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>13.0</td>\n", " <td>1541</td>\n", " <td>None</td>\n", " <td>NaN</td>\n", " <td>13.514701</td>\n", " <td>25.827027</td>\n", " <td>0.657040</td>\n", " <td>14.230466</td>\n", " <td>15.453939</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1</td>\n", " <td>14.0</td>\n", " <td>1551</td>\n", " <td>None</td>\n", " <td>NaN</td>\n", " <td>16.835040</td>\n", " <td>27.427233</td>\n", " <td>0.057516</td>\n", " <td>15.328696</td>\n", " <td>17.867260</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1</td>\n", " <td>15.0</td>\n", " <td>1561</td>\n", " <td>None</td>\n", " <td>NaN</td>\n", " <td>11.656880</td>\n", " <td>24.721404</td>\n", " <td>NaN</td>\n", " <td>13.642248</td>\n", " <td>22.210971</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1</td>\n", " <td>16.0</td>\n", " <td>1571</td>\n", " <td>None</td>\n", " <td>103.0</td>\n", " <td>7.426918</td>\n", " <td>30.572856</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>5 rows × 140 columns</p>\n", "</div>" ], "text/plain": [ " period_id timestamp frame_id ball_state ball_owning_team_id ball_x \\\n", "0 1 12.0 1531 None NaN 18.887029 \n", "1 1 13.0 1541 None NaN 13.514701 \n", "2 1 14.0 1551 None NaN 16.835040 \n", "3 1 15.0 1561 None NaN 11.656880 \n", "4 1 16.0 1571 None 103.0 7.426918 \n", "\n", " ball_y ball_z home_22_x home_22_y ... away_anon_304_d \\\n", "0 30.037350 1.246213 14.195641 23.489013 ... NaN \n", "1 25.827027 0.657040 14.230466 15.453939 ... NaN \n", "2 27.427233 0.057516 15.328696 17.867260 ... NaN \n", "3 24.721404 NaN 13.642248 22.210971 ... NaN \n", "4 30.572856 NaN NaN NaN ... NaN \n", "\n", " away_anon_304_s home_anon_368_x home_anon_368_y home_anon_368_d \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " home_anon_368_s home_anon_423_x home_anon_423_y home_anon_423_d \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " home_anon_423_s \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", "[5 rows x 140 columns]" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from kloppy import skillcorner\n", "\n", "dataset = skillcorner.load(\n", " meta_data=\"../../kloppy/tests/files/skillcorner_match_data.json\",\n", " raw_data=\"../../kloppy/tests/files/skillcorner_structured_data.json\",\n", " \n", " # Optional arguments\n", " sample_rate=1/10,\n", " limit=100,\n", " coordinates=\"skillcorner\",\n", " include_empty_frames=False\n", ")\n", "\n", "dataset.to_df().head()" ] }, { "cell_type": "markdown", "id": "af4c1e42", "metadata": {}, "source": [ "## Load remote open data files\n", "\n", "SkillCorner has available a selection of 9 games available on their [GitHub](https://github.com/SkillCorner/opendata/tree/master/data), listed below are these games and their respective match_ids.\n", "\n", "| match_id | home_team | away_team | date_time |\n", "|------|------------------|------------------|-------------|\n", "| 4039 | Manchester City | Liverpool | 2020-07-02 |\n", "| 3749 | Dortmund | Bayern Munchen | 2020-05-26 |\n", "| 3518 | Juventus | Inter | 2020-03-08 |\n", "| 3442 | Real Madrid | FC Barcelona | 2020-03-01 |\n", "| 2841 | FC Barcelona | Real Madrid | 2019-12-18 |\n", "| 2440 | Liverpool | Manchester City | 2019-11-10 |\n", "| 2417 | Bayern Munchen | Dortmund | 2019-11-09 |\n", "| 2269 | Paris | Marseille | 2019-10-27 |\n", "| 2068 | Inter | Juventus | 2019-10-06 |\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "bff4e3aa", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>period_id</th>\n", " <th>timestamp</th>\n", " <th>frame_id</th>\n", " <th>ball_state</th>\n", " <th>ball_owning_team_id</th>\n", " <th>ball_x</th>\n", " <th>ball_y</th>\n", " <th>ball_z</th>\n", " <th>home_47_x</th>\n", " <th>home_47_y</th>\n", " <th>...</th>\n", " <th>home_31_d</th>\n", " <th>home_31_s</th>\n", " <th>home_anon_471_x</th>\n", " <th>home_anon_471_y</th>\n", " <th>home_anon_471_d</th>\n", " <th>home_anon_471_s</th>\n", " <th>home_anon_501_x</th>\n", " <th>home_anon_501_y</th>\n", " <th>home_anon_501_d</th>\n", " <th>home_anon_501_s</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>1.0</td>\n", " <td>452</td>\n", " <td>None</td>\n", " <td>NaN</td>\n", " <td>14.693770</td>\n", " <td>-0.907936</td>\n", " <td>5.222056e-02</td>\n", " <td>-4.630710</td>\n", " <td>-20.740084</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>2.0</td>\n", " <td>462</td>\n", " <td>None</td>\n", " <td>NaN</td>\n", " <td>14.634828</td>\n", " <td>-0.030155</td>\n", " <td>-3.604550e-02</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1</td>\n", " <td>13.0</td>\n", " <td>572</td>\n", " <td>None</td>\n", " <td>40.0</td>\n", " <td>37.410470</td>\n", " <td>-19.993496</td>\n", " <td>-7.256100e-16</td>\n", " <td>19.580474</td>\n", " <td>-19.368974</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1</td>\n", " <td>14.0</td>\n", " <td>582</td>\n", " <td>None</td>\n", " <td>40.0</td>\n", " <td>42.215165</td>\n", " <td>-22.637840</td>\n", " <td>1.230652e-01</td>\n", " <td>26.785449</td>\n", " <td>-18.257435</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1</td>\n", " <td>15.0</td>\n", " <td>592</td>\n", " <td>None</td>\n", " <td>40.0</td>\n", " <td>42.433060</td>\n", " <td>-28.680917</td>\n", " <td>NaN</td>\n", " <td>31.769383</td>\n", " <td>-22.166959</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>5 rows × 112 columns</p>\n", "</div>" ], "text/plain": [ " period_id timestamp frame_id ball_state ball_owning_team_id ball_x \\\n", "0 1 1.0 452 None NaN 14.693770 \n", "1 1 2.0 462 None NaN 14.634828 \n", "2 1 13.0 572 None 40.0 37.410470 \n", "3 1 14.0 582 None 40.0 42.215165 \n", "4 1 15.0 592 None 40.0 42.433060 \n", "\n", " ball_y ball_z home_47_x home_47_y ... home_31_d home_31_s \\\n", "0 -0.907936 5.222056e-02 -4.630710 -20.740084 ... NaN NaN \n", "1 -0.030155 -3.604550e-02 NaN NaN ... NaN NaN \n", "2 -19.993496 -7.256100e-16 19.580474 -19.368974 ... NaN NaN \n", "3 -22.637840 1.230652e-01 26.785449 -18.257435 ... NaN NaN \n", "4 -28.680917 NaN 31.769383 -22.166959 ... NaN NaN \n", "\n", " home_anon_471_x home_anon_471_y home_anon_471_d home_anon_471_s \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " home_anon_501_x home_anon_501_y home_anon_501_d home_anon_501_s \n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", "[5 rows x 112 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from kloppy import skillcorner\n", "\n", "dataset = skillcorner.load(\n", " meta_data=\"https://raw.githubusercontent.com/SkillCorner/opendata/master/data/matches/4039/match_data.json\",\n", " raw_data=\"https://raw.githubusercontent.com/SkillCorner/opendata/master/data/matches/4039/structured_data.json\",\n", " \n", " # Optional arguments\n", " sample_rate=1/10,\n", " limit=100,\n", " coordinates=\"skillcorner\",\n", " include_empty_frames=False\n", ")\n", "\n", "dataset.to_df().head()" ] }, { "cell_type": "markdown", "id": "2f490d61", "metadata": {}, "source": [ "## Load remote files\n", "Kloppy supports remote files through `fsspec` FileSystem under the hood. This allows you to work with files in AWS S3, Google Cloud, Azure Blob, HDFS, FTP, and SFTP without extra tools.\n", "For example you can pass:\n", "- Individual s3 file paths: (e.g `raw_data=s3://.../skillcorner_match_data.jsonl`)\n", "\n", "Note: Kloppy might throw an the first time to help you identify missing cloud specific dependencies like `s3fs`. " ] }, { "cell_type": "code", "execution_count": null, "id": "797091f9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>period_id</th>\n", " <th>timestamp</th>\n", " <th>frame_id</th>\n", " <th>ball_state</th>\n", " <th>ball_owning_team_id</th>\n", " <th>ball_x</th>\n", " <th>ball_y</th>\n", " <th>ball_z</th>\n", " <th>home_47_x</th>\n", " <th>home_47_y</th>\n", " <th>...</th>\n", " <th>home_31_d</th>\n", " <th>home_31_s</th>\n", " <th>home_anon_471_x</th>\n", " <th>home_anon_471_y</th>\n", " <th>home_anon_471_d</th>\n", " <th>home_anon_471_s</th>\n", " <th>home_anon_501_x</th>\n", " <th>home_anon_501_y</th>\n", " <th>home_anon_501_d</th>\n", " <th>home_anon_501_s</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>1.0</td>\n", " <td>452</td>\n", " <td>None</td>\n", " <td>NaN</td>\n", " <td>14.693770</td>\n", " <td>-0.907936</td>\n", " <td>5.222056e-02</td>\n", " <td>-4.630710</td>\n", " <td>-20.740084</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>2.0</td>\n", " <td>462</td>\n", " <td>None</td>\n", " <td>NaN</td>\n", " <td>14.634828</td>\n", " <td>-0.030155</td>\n", " <td>-3.604550e-02</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1</td>\n", " <td>13.0</td>\n", " <td>572</td>\n", " <td>None</td>\n", " <td>40.0</td>\n", " <td>37.410470</td>\n", " <td>-19.993496</td>\n", " <td>-7.256100e-16</td>\n", " <td>19.580474</td>\n", " <td>-19.368974</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1</td>\n", " <td>14.0</td>\n", " <td>582</td>\n", " <td>None</td>\n", " <td>40.0</td>\n", " <td>42.215165</td>\n", " <td>-22.637840</td>\n", " <td>1.230652e-01</td>\n", " <td>26.785449</td>\n", " <td>-18.257435</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1</td>\n", " <td>15.0</td>\n", " <td>592</td>\n", " <td>None</td>\n", " <td>40.0</td>\n", " <td>42.433060</td>\n", " <td>-28.680917</td>\n", " <td>NaN</td>\n", " <td>31.769383</td>\n", " <td>-22.166959</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>5 rows × 112 columns</p>\n", "</div>" ], "text/plain": [ " period_id timestamp frame_id ball_state ball_owning_team_id ball_x \\\n", "0 1 1.0 452 None NaN 14.693770 \n", "1 1 2.0 462 None NaN 14.634828 \n", "2 1 13.0 572 None 40.0 37.410470 \n", "3 1 14.0 582 None 40.0 42.215165 \n", "4 1 15.0 592 None 40.0 42.433060 \n", "\n", " ball_y ball_z home_47_x home_47_y ... home_31_d home_31_s \\\n", "0 -0.907936 5.222056e-02 -4.630710 -20.740084 ... NaN NaN \n", "1 -0.030155 -3.604550e-02 NaN NaN ... NaN NaN \n", "2 -19.993496 -7.256100e-16 19.580474 -19.368974 ... NaN NaN \n", "3 -22.637840 1.230652e-01 26.785449 -18.257435 ... NaN NaN \n", "4 -28.680917 NaN 31.769383 -22.166959 ... NaN NaN \n", "\n", " home_anon_471_x home_anon_471_y home_anon_471_d home_anon_471_s \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " home_anon_501_x home_anon_501_y home_anon_501_d home_anon_501_s \n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", "[5 rows x 112 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from kloppy import skillcorner\n", "\n", "dataset = skillcorner.load_open_data(\n", " meta_data=\"s3://.../skillcorner_match_data.jsonl\",\n", " raw_data=\"s3://.../skillcorner_structured_data.json\",\n", " \n", " # Optional arguments\n", " sample_rate=1/10,\n", " limit=100,\n", " coordinates=\"skillcorner\",\n", " include_empty_frames=False\n", ")\n", "\n", "dataset.to_df().head()" ] } ], "metadata": { "kernelspec": { "display_name": "kloppy-venv", "language": "python", "name": "kloppy-venv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 5 }