{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "04ea80c2",
   "metadata": {},
   "source": [
    "# Datafactory\n",
    "\n",
    "- [Load local files](#load-local-files)\n",
    "- [Load remote files](#load-remote-files)\n",
    "\n",
    "## Load local files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "3848e9aa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>event_type</th>\n",
       "      <th>result</th>\n",
       "      <th>success</th>\n",
       "      <th>period_id</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>end_timestamp</th>\n",
       "      <th>ball_state</th>\n",
       "      <th>ball_owning_team</th>\n",
       "      <th>team_id</th>\n",
       "      <th>player_id</th>\n",
       "      <th>coordinates_x</th>\n",
       "      <th>coordinates_y</th>\n",
       "      <th>end_coordinates_x</th>\n",
       "      <th>end_coordinates_y</th>\n",
       "      <th>receiver_player_id</th>\n",
       "      <th>set_piece_type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22094464</td>\n",
       "      <td>PASS</td>\n",
       "      <td>COMPLETE</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>alive</td>\n",
       "      <td>279</td>\n",
       "      <td>279</td>\n",
       "      <td>40975</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.01</td>\n",
       "      <td>0.26</td>\n",
       "      <td>-0.05</td>\n",
       "      <td>93357</td>\n",
       "      <td>KICK_OFF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>22094465</td>\n",
       "      <td>PASS</td>\n",
       "      <td>COMPLETE</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>6.0</td>\n",
       "      <td>alive</td>\n",
       "      <td>279</td>\n",
       "      <td>279</td>\n",
       "      <td>93357</td>\n",
       "      <td>0.33</td>\n",
       "      <td>-0.13</td>\n",
       "      <td>0.38</td>\n",
       "      <td>-0.29</td>\n",
       "      <td>11458</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>22094466</td>\n",
       "      <td>PASS</td>\n",
       "      <td>COMPLETE</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>8.0</td>\n",
       "      <td>alive</td>\n",
       "      <td>279</td>\n",
       "      <td>279</td>\n",
       "      <td>11458</td>\n",
       "      <td>0.45</td>\n",
       "      <td>-0.49</td>\n",
       "      <td>0.44</td>\n",
       "      <td>-0.78</td>\n",
       "      <td>62686</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>22094467</td>\n",
       "      <td>PASS</td>\n",
       "      <td>COMPLETE</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>10.0</td>\n",
       "      <td>alive</td>\n",
       "      <td>279</td>\n",
       "      <td>279</td>\n",
       "      <td>62686</td>\n",
       "      <td>0.44</td>\n",
       "      <td>-0.85</td>\n",
       "      <td>0.79</td>\n",
       "      <td>-0.30</td>\n",
       "      <td>41488</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22094468</td>\n",
       "      <td>PASS</td>\n",
       "      <td>COMPLETE</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>16.0</td>\n",
       "      <td>alive</td>\n",
       "      <td>279</td>\n",
       "      <td>279</td>\n",
       "      <td>41488</td>\n",
       "      <td>0.76</td>\n",
       "      <td>-0.31</td>\n",
       "      <td>-0.25</td>\n",
       "      <td>-0.19</td>\n",
       "      <td>40975</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   event_id event_type    result success  period_id  timestamp  end_timestamp  \\\n",
       "0  22094464       PASS  COMPLETE    True          1          3            3.0   \n",
       "1  22094465       PASS  COMPLETE    True          1          6            6.0   \n",
       "2  22094466       PASS  COMPLETE    True          1          8            8.0   \n",
       "3  22094467       PASS  COMPLETE    True          1         10           10.0   \n",
       "4  22094468       PASS  COMPLETE    True          1         16           16.0   \n",
       "\n",
       "  ball_state ball_owning_team team_id player_id  coordinates_x  coordinates_y  \\\n",
       "0      alive              279     279     40975           0.01           0.01   \n",
       "1      alive              279     279     93357           0.33          -0.13   \n",
       "2      alive              279     279     11458           0.45          -0.49   \n",
       "3      alive              279     279     62686           0.44          -0.85   \n",
       "4      alive              279     279     41488           0.76          -0.31   \n",
       "\n",
       "   end_coordinates_x  end_coordinates_y receiver_player_id set_piece_type  \n",
       "0               0.26              -0.05              93357       KICK_OFF  \n",
       "1               0.38              -0.29              11458            NaN  \n",
       "2               0.44              -0.78              62686            NaN  \n",
       "3               0.79              -0.30              41488            NaN  \n",
       "4              -0.25              -0.19              40975            NaN  "
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from kloppy import datafactory\n",
    "\n",
    "dataset = datafactory.load(\n",
    "    event_data=\"../../kloppy/tests/files/datafactory_events.json\",\n",
    "    \n",
    "    # Optional arguments\n",
    "    coordinates=\"datafactory\",\n",
    "    event_types=[\"shot\", \"pass\"]\n",
    ")\n",
    "\n",
    "dataset.to_df().head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b03bd269",
   "metadata": {},
   "source": [
    "## Load remote files\n",
    "\n",
    "Kloppy supports remote files through `fsspec` FileSystem under the hood. This allows you to work with files in AWS S3, Google Cloud, Azure Blob, HDFS, FTP, and SFTP without extra tools.\n",
    "For example you can pass:\n",
    "- Individual s3 file paths: (e.g `event_data=s3://.../datafactory_events.json`)\n",
    "\n",
    "Note: Kloppy might throw an the first time to help you identify missing cloud specific dependencies like `s3fs`. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71d27612",
   "metadata": {},
   "outputs": [],
   "source": [
    "from kloppy import datafactory\n",
    "\n",
    "dataset = datafactory.load(\n",
    "    event_data=\"s3://.../datafactory_events.json\",\n",
    "    \n",
    "    # Optional arguments\n",
    "    coordinates=\"datafactory\",\n",
    "    event_types=[\"shot\", \"pass\"]\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "kloppy-venv",
   "language": "python",
   "name": "kloppy-venv"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}