Source code for torch_frame.datasets.dota2

import os.path as osp
import zipfile

import pandas as pd

import torch_frame


[docs]class Dota2(torch_frame.data.Dataset): r"""The `Dota2 Game Results <https://archive.ics.uci.edu/dataset/367/dota2+games+results>`_ dataset. Dota2 is a popular moba game with two teams of 5 players. At start of the game, each player choose a unique hero with different strengths and weakness. The dataset is reasonably sparse as only 10 of 113 possible heroes are chosen in a given game. All games were played in a space of 2 hours on the 13th of August 2016. The classification goal is to predict the winning team. **STATS:** .. list-table:: :widths: 10 10 10 10 20 10 :header-rows: 1 * - #rows - #cols (numerical) - #cols (categorical) - #classes - Task - Missing value ratio * - 92,650 - 0 - 116 - 2 - binary_classification - 0.0% """ url = 'https://archive.ics.uci.edu/static/public/367/dota2+games+results.zip' # noqa def __init__(self, root: str): path = self.download_url(self.url, root) names = [ 'Team won the game', 'Cluster ID', 'Game mode', 'Game type', ] num_heroes = 113 names += [f'hero_{i}' for i in range(num_heroes)] folder_path = osp.dirname(path) with zipfile.ZipFile(path, 'r') as zip_ref: zip_ref.extractall(folder_path) df = pd.read_csv(osp.join(folder_path, 'dota2Train.csv'), names=names) col_to_stype = { 'Team won the game': torch_frame.categorical, 'Cluster ID': torch_frame.categorical, 'Game mode': torch_frame.categorical, 'Game type': torch_frame.categorical, } for i in range(num_heroes): col_to_stype[f'hero_{i}'] = torch_frame.categorical super().__init__(df, col_to_stype, target_col='Team won the game')