{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Parsing custom events"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from kloppy import statsbomb\n",
    "from kloppy.domain import ShotEvent, EventFactory, create_event, ShotEvent\n",
    "from dataclasses import dataclass\n",
    "from typing import Optional\n",
    "\n",
    "import polars as pl"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define a custom shot event class\n",
    "- Extends `ShotEvent` to include `xg` (expected goals).\n",
    "- Adds `inverted_xg` property to compute `1 - xG`.\n",
    "- Overrides `__str__` to provide a readable string representation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "@dataclass(repr=False)\n",
    "class StatsbombShotEvent(ShotEvent):\n",
    "    xg: Optional[float] = None\n",
    "\n",
    "    @property\n",
    "    def inverted_xg(self) -> Optional[float]:\n",
    "        if self.xg is not None:\n",
    "            return 1 - self.xg\n",
    "        else:\n",
    "            return None\n",
    "\n",
    "    def __str__(self):\n",
    "        return f\"<Shot event_id='{self.event_id}' time='{self.time}' player='{self.player}' xg={self.xg} result='{self.result}'>\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Implement a custom event factory\n",
    "- Extracts xg from raw_event if available.\n",
    "- Converts xG value to float.\n",
    "- Uses create_event to construct a StatsbombShotEvent instance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class StatsbombEventFactory(EventFactory):\n",
    "    def build_shot(self, raw_event, **kwargs) -> ShotEvent:\n",
    "        xg = (\n",
    "            raw_event[\"shot\"][\"statsbomb_xg\"]\n",
    "            if \"shot\" in raw_event and raw_event[\"shot\"]\n",
    "            else None\n",
    "        )\n",
    "        if xg is not None:\n",
    "            xg = float(xg)\n",
    "\n",
    "        return create_event(\n",
    "            StatsbombShotEvent,\n",
    "            xg=xg,\n",
    "            raw_event=raw_event,\n",
    "            **kwargs,\n",
    "        )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For the sake of this demonstration, we will use Statsbomb Open Event Data and apply our custom EventFactory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/kloppy/kloppy/_providers/statsbomb.py:83: UserWarning: \n",
      "\n",
      "You are about to use StatsBomb public data.\n",
      "By using this data, you are agreeing to the user agreement. \n",
      "The user agreement can be found here: https://github.com/statsbomb/open-data/blob/master/LICENSE.pdf\n",
      "\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "dataset = statsbomb.load_open_data(\n",
    "    match_id=15946,\n",
    "    # Optional arguments\n",
    "    coordinates=\"statsbomb\",\n",
    "    event_factory=StatsbombEventFactory(),  # Use our custom EventFactory\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Filter and print shots"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<Shot event_id='becd7956-ce44-479e-8fc9-16a2d1f1f349' time='P1T02:29' player='Lionel Andrés Messi Cuccittini' xg=0.07699243 result='OFF_TARGET'>\n",
      "<Shot event_id='9107d374-2942-4876-a14f-1b9f86901c15' time='P1T05:40' player='Jordi Alba Ramos' xg=0.05166811 result='OFF_TARGET'>\n",
      "<Shot event_id='ddd194ca-08fb-43d0-87c2-33647f975f9f' time='P1T15:29' player='Lionel Andrés Messi Cuccittini' xg=0.016932096 result='SAVED'>\n",
      "<Shot event_id='86596ddb-d824-4e5e-b18c-b4442e9ce7cf' time='P1T16:20' player='Rubén Sobrino Pozuelo' xg=0.1226044 result='OFF_TARGET'>\n",
      "<Shot event_id='3ed2b107-be17-42d5-9d1b-25006a0e55cb' time='P1T18:16' player='Luis Alberto Suárez Díaz' xg=0.041750744 result='OFF_TARGET'>\n"
     ]
    }
   ],
   "source": [
    "shots = dataset.filter(\"shot\")\n",
    "\n",
    "for shot in shots[:5]:\n",
    "    print(shot)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Use the `inverted_xg` property"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.92300757\n",
      "0.94833189\n",
      "0.983067904\n",
      "0.8773956\n",
      "0.958249256\n"
     ]
    }
   ],
   "source": [
    "for shot in shots[:5]:\n",
    "    print(shot.inverted_xg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Convert shots to a Polars DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (28, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>player_id</th><th>xg</th></tr><tr><td>str</td><td>f64</td></tr></thead><tbody><tr><td>&quot;5503&quot;</td><td>0.076992</td></tr><tr><td>&quot;5211&quot;</td><td>0.051668</td></tr><tr><td>&quot;5503&quot;</td><td>0.016932</td></tr><tr><td>&quot;6613&quot;</td><td>0.1226044</td></tr><tr><td>&quot;5246&quot;</td><td>0.041751</td></tr><tr><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;6935&quot;</td><td>0.140808</td></tr><tr><td>&quot;3501&quot;</td><td>0.334535</td></tr><tr><td>&quot;3501&quot;</td><td>0.105343</td></tr><tr><td>&quot;5503&quot;</td><td>0.09483</td></tr><tr><td>&quot;5503&quot;</td><td>0.420624</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (28, 2)\n",
       "┌───────────┬───────────┐\n",
       "│ player_id ┆ xg        │\n",
       "│ ---       ┆ ---       │\n",
       "│ str       ┆ f64       │\n",
       "╞═══════════╪═══════════╡\n",
       "│ 5503      ┆ 0.076992  │\n",
       "│ 5211      ┆ 0.051668  │\n",
       "│ 5503      ┆ 0.016932  │\n",
       "│ 6613      ┆ 0.1226044 │\n",
       "│ 5246      ┆ 0.041751  │\n",
       "│ …         ┆ …         │\n",
       "│ 6935      ┆ 0.140808  │\n",
       "│ 3501      ┆ 0.334535  │\n",
       "│ 3501      ┆ 0.105343  │\n",
       "│ 5503      ┆ 0.09483   │\n",
       "│ 5503      ┆ 0.420624  │\n",
       "└───────────┴───────────┘"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "shots.to_df(\"player_id\", \"xg\", engine=\"polars\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "/home/pieterr/Jupiter/Projects/kloppy",
   "language": "python",
   "name": "kloppy"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}