Source code for torch_frame.datasets.bank_marketing

import os.path as osp
import zipfile

import pandas as pd

import torch_frame


[docs]class BankMarketing(torch_frame.data.Dataset): r"""The `Bank Marketing <https://github.com/LeoGrin/tabular-benchmark>`_ dataset. It's related with direct marketing campaigns of a Portuguese banking institution. The marketing campaigns were based on phone calls. Often, more than one contant to the same client was required, in order to access if the product (bank term deposit) would be (or not) subscribed. The classification goal is to predict if the client will subscribe a term deposit. **STATS:** .. list-table:: :widths: 10 10 10 10 20 10 :header-rows: 1 * - #rows - #cols (numerical) - #cols (categorical) - #classes - Task - Missing value ratio * - 45,211 - 7 - 9 - 2 - binary_classification - 0.0% """ url = 'https://archive.ics.uci.edu/static/public/222/bank+marketing.zip' # noqa def __init__(self, root: str): path = self.download_url(self.url, root) folder_path = osp.dirname(path) with zipfile.ZipFile(path, 'r') as zip_ref: zip_ref.extractall(folder_path) data_path = osp.join(folder_path, 'bank.zip') data_subfolder_path = osp.join(folder_path, 'bank') with zipfile.ZipFile(data_path, 'r') as zip_ref: zip_ref.extractall(data_subfolder_path) df = pd.read_csv(osp.join(data_subfolder_path, 'bank-full.csv'), sep=';') col_to_stype = { 'age': torch_frame.numerical, 'job': torch_frame.categorical, 'marital': torch_frame.categorical, 'education': torch_frame.categorical, 'default': torch_frame.categorical, 'balance': torch_frame.numerical, 'housing': torch_frame.categorical, 'loan': torch_frame.categorical, 'contact': torch_frame.categorical, 'day': torch_frame.numerical, 'month': torch_frame.categorical, 'duration': torch_frame.numerical, 'campaign': torch_frame.numerical, 'pdays': torch_frame.numerical, 'previous': torch_frame.numerical, 'poutcome': torch_frame.categorical, 'y': torch_frame.categorical, } super().__init__(df, col_to_stype, target_col='y')