Source code for torch_frame.datasets.poker_hand

import os.path as osp
import zipfile

import pandas as pd

import torch_frame


[docs]class PokerHand(torch_frame.data.Dataset): r"""The `Poker Hand <https://archive.ics.uci.edu/dataset/158/poker+hand>`_ dataset. It's a task to predict 5-card poker hand. **STATS:** .. list-table:: :widths: 10 10 10 10 20 10 :header-rows: 1 * - #rows - #cols (numerical) - #cols (categorical) - #classes - Task - Missing value ratio * - 1,025,010 - 5 - 5 - 10 - multiclass_classification - 0.0% """ url = 'https://archive.ics.uci.edu/static/public/158/poker+hand.zip' def __init__(self, root: str): path = self.download_url(self.url, root) folder_path = osp.dirname(path) with zipfile.ZipFile(path, 'r') as zip_ref: zip_ref.extractall(folder_path) train_path = osp.join(folder_path, 'poker-hand-training-true.data') test_path = osp.join(folder_path, 'poker-hand-testing.data') names = [ 'Suit of card #1', 'Rank of card #1', 'Suit of card #2', 'Rank of card #2', 'Suit of card #3', 'Rank of card #3', 'Suit of card #4', 'Rank of card #4', 'Suit of card #5', 'Rank of card #5', 'Poker Hand', ] train_df = pd.read_csv(train_path, names=names) test_df = pd.read_csv(test_path, names=names) df = pd.concat([train_df, test_df], ignore_index=True) col_to_stype = { 'Suit of card #1': torch_frame.categorical, 'Rank of card #1': torch_frame.numerical, 'Suit of card #2': torch_frame.categorical, 'Rank of card #2': torch_frame.numerical, 'Suit of card #3': torch_frame.categorical, 'Rank of card #3': torch_frame.numerical, 'Suit of card #4': torch_frame.categorical, 'Rank of card #4': torch_frame.numerical, 'Suit of card #5': torch_frame.categorical, 'Rank of card #5': torch_frame.numerical, 'Poker Hand': torch_frame.categorical, } super().__init__(df, col_to_stype, target_col='Poker Hand')