Source code for torch_frame.datasets.dota2

import os.path as osp
import zipfile

import pandas as pd

import torch_frame


[docs]class Dota2(torch_frame.data.Dataset):
    r"""The `Dota2 Game Results
    <https://archive.ics.uci.edu/dataset/367/dota2+games+results>`_
    dataset. Dota2 is a popular moba game with two teams of 5 players.
    At start of the game, each player choose a unique hero with
    different strengths and weakness. The dataset is reasonably sparse
    as only 10 of 113 possible heroes are chosen in a given game. All
    games were played in a space of 2 hours on the 13th of August 2016.
    The classification goal is to predict the winning team.

    **STATS:**

    .. list-table::
        :widths: 10 10 10 10 20 10
        :header-rows: 1

        * - #rows
          - #cols (numerical)
          - #cols (categorical)
          - #classes
          - Task
          - Missing value ratio
        * - 92,650
          - 0
          - 116
          - 2
          - binary_classification
          - 0.0%
    """

    url = 'https://archive.ics.uci.edu/static/public/367/dota2+games+results.zip'  # noqa

    def __init__(self, root: str):
        path = self.download_url(self.url, root)
        names = [
            'Team won the game',
            'Cluster ID',
            'Game mode',
            'Game type',
        ]
        num_heroes = 113
        names += [f'hero_{i}' for i in range(num_heroes)]
        folder_path = osp.dirname(path)
        with zipfile.ZipFile(path, 'r') as zip_ref:
            zip_ref.extractall(folder_path)

        df = pd.read_csv(osp.join(folder_path, 'dota2Train.csv'), names=names)

        col_to_stype = {
            'Team won the game': torch_frame.categorical,
            'Cluster ID': torch_frame.categorical,
            'Game mode': torch_frame.categorical,
            'Game type': torch_frame.categorical,
        }
        for i in range(num_heroes):
            col_to_stype[f'hero_{i}'] = torch_frame.categorical

        super().__init__(df, col_to_stype, target_col='Team won the game')