Adapter¶
Since 3.3 kloppy uses adapters to load data from external sources. kloppy is shipped with support for http
and s3
, but you can add your own adapters to support different external sources.
S3¶
kloppy uses s3fs
to access files on s3. If preferred you can create a s3fs.S3FileSystem
instance and pass it via set_config
In [ ]:
Copied!
import s3fs
from kloppy import statsbomb
from kloppy.config import set_config
file_system = s3fs.S3FileSystem(anon=True)
set_config(
'adapters.s3.s3fs',
file_system
)
# This will fail because we don't have access to 'some-bucket'
dataset = statsbomb.load(
event_data='s3://some-bucket/1234/events.json',
lineup_data='s3://some-bucket/1234/lineup.json'
)
import s3fs
from kloppy import statsbomb
from kloppy.config import set_config
file_system = s3fs.S3FileSystem(anon=True)
set_config(
'adapters.s3.s3fs',
file_system
)
# This will fail because we don't have access to 'some-bucket'
dataset = statsbomb.load(
event_data='s3://some-bucket/1234/events.json',
lineup_data='s3://some-bucket/1234/lineup.json'
)
Custom adapter - database¶
It's possible to create your own adapter. For example a database adapter.
First create a table within a sqlite database to hold the file content.
In [45]:
Copied!
import sqlite3
# Setup a table
con = sqlite3.connect('database.db')
try:
con.execute("CREATE TABLE files(match_id INT, file_type TEXT, data TEXT, UNIQUE(match_id, file_type))")
con.commit()
except sqlite3.OperationalError:
# already exists
pass
# Load some open data into the database
import requests
def load_into_table(match_id):
events_data = requests.get(f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/{match_id}.json")
con.execute("INSERT INTO files values(?, ?, ?)", (match_id, 'events', events_data.content))
lineup_data = requests.get(f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/lineups/{match_id}.json")
con.execute("INSERT INTO files values(?, ?, ?)", (match_id, 'lineup', lineup_data.content))
con.commit()
import sqlite3
# Setup a table
con = sqlite3.connect('database.db')
try:
con.execute("CREATE TABLE files(match_id INT, file_type TEXT, data TEXT, UNIQUE(match_id, file_type))")
con.commit()
except sqlite3.OperationalError:
# already exists
pass
# Load some open data into the database
import requests
def load_into_table(match_id):
events_data = requests.get(f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/{match_id}.json")
con.execute("INSERT INTO files values(?, ?, ?)", (match_id, 'events', events_data.content))
lineup_data = requests.get(f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/lineups/{match_id}.json")
con.execute("INSERT INTO files values(?, ?, ?)", (match_id, 'lineup', lineup_data.content))
con.commit()
Next load the data from two matches into the database
In [46]:
Copied!
load_into_table(9636)
load_into_table(9609)
load_into_table(9636)
load_into_table(9609)
Define our database adapter. The adapter supports all urls starting with db://
In [60]:
Copied!
from typing import BinaryIO
from kloppy.infra.io.adapters import Adapter, adapters
class DBAdapter(Adapter):
def __init__(self, con):
self.con = con
def supports(self, url: str):
return url.startswith("db://")
def read_to_stream(self, url: str, output: BinaryIO):
match_id, file_type = url[5:].split("/")
cursor = con.cursor()
cursor.execute("SELECT data FROM files WHERE match_id = ? AND file_type = ?", (match_id, file_type))
result = cursor.fetchone()
output.write(result[0])
db_adapter = DBAdapter(con)
# When you rerun this code you might need to run: adapters.clear()
adapters.append(db_adapter)
set_config('cache', None)
from typing import BinaryIO
from kloppy.infra.io.adapters import Adapter, adapters
class DBAdapter(Adapter):
def __init__(self, con):
self.con = con
def supports(self, url: str):
return url.startswith("db://")
def read_to_stream(self, url: str, output: BinaryIO):
match_id, file_type = url[5:].split("/")
cursor = con.cursor()
cursor.execute("SELECT data FROM files WHERE match_id = ? AND file_type = ?", (match_id, file_type))
result = cursor.fetchone()
output.write(result[0])
db_adapter = DBAdapter(con)
# When you rerun this code you might need to run: adapters.clear()
adapters.append(db_adapter)
set_config('cache', None)
Use the adapter!
In [61]:
Copied!
dataset = statsbomb.load(
event_data="db://9636/events",
lineup_data="db://9636/lineup",
)
dataset = statsbomb.load(
event_data="db://9636/events",
lineup_data="db://9636/lineup",
)
In [63]:
Copied!
home_team, away_team = dataset.metadata.teams
print(f"Loaded from database: {home_team} - {away_team}")
home_team, away_team = dataset.metadata.teams
print(f"Loaded from database: {home_team} - {away_team}")
Loaded from database: Barcelona - Las Palmas