Skip to content

Commit ae8ced2

Browse files
Added UCI datasets 21 to 40 (#104)
* Added datasets 21 to 40 * [github-action] formatting fixes Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 22d1126 commit ae8ced2

22 files changed

+712
-0
lines changed

docs/datasets.rst

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,26 @@ The Torchhd library provides many popular built-in datasets to work with.
3939
Cardiotocography3Clases
4040
Cardiotocography10Clases
4141
ChessKrvk
42+
ChessKrvkp
43+
CongressionalVoting
44+
ConnBenchSonarMinesRocks
45+
ConnBenchVowelDeterding
46+
Connect4
47+
Contrac
48+
CreditApproval
49+
CylinderBands
50+
Dermatology
51+
Echocardiogram
52+
Ecoli
53+
EnergyY1
54+
EnergyY2
55+
Fertility
56+
Flags
57+
Glass
58+
HabermanSurvival
59+
HayesRoth
60+
HeartCleveland
61+
HeartHungarian
4262

4363
Base classes
4464
------------------------

torchhd/datasets/__init__.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,26 @@
2929
from torchhd.datasets.cardiotocography_3clases import Cardiotocography3Clases
3030
from torchhd.datasets.cardiotocography_10clases import Cardiotocography10Clases
3131
from torchhd.datasets.chess_krvk import ChessKrvk
32+
from torchhd.datasets.chess_krvkp import ChessKrvkp
33+
from torchhd.datasets.congressional_voting import CongressionalVoting
34+
from torchhd.datasets.conn_bench_sonar_mines_rocks import ConnBenchSonarMinesRocks
35+
from torchhd.datasets.conn_bench_vowel_deterding import ConnBenchVowelDeterding
36+
from torchhd.datasets.connect_4 import Connect4
37+
from torchhd.datasets.contrac import Contrac
38+
from torchhd.datasets.credit_approval import CreditApproval
39+
from torchhd.datasets.cylinder_bands import CylinderBands
40+
from torchhd.datasets.dermatology import Dermatology
41+
from torchhd.datasets.echocardiogram import Echocardiogram
42+
from torchhd.datasets.ecoli import Ecoli
43+
from torchhd.datasets.energy_y1 import EnergyY1
44+
from torchhd.datasets.energy_y2 import EnergyY2
45+
from torchhd.datasets.fertility import Fertility
46+
from torchhd.datasets.flags import Flags
47+
from torchhd.datasets.glass import Glass
48+
from torchhd.datasets.haberman_survival import HabermanSurvival
49+
from torchhd.datasets.hayes_roth import HayesRoth
50+
from torchhd.datasets.heart_cleveland import HeartCleveland
51+
from torchhd.datasets.heart_hungarian import HeartHungarian
3252

3353

3454
__all__ = [
@@ -63,4 +83,24 @@
6383
"Cardiotocography3Clases",
6484
"Cardiotocography10Clases",
6585
"ChessKrvk",
86+
"ChessKrvkp",
87+
"CongressionalVoting",
88+
"ConnBenchSonarMinesRocks",
89+
"ConnBenchVowelDeterding",
90+
"Connect4",
91+
"Contrac",
92+
"CreditApproval",
93+
"CylinderBands",
94+
"Dermatology",
95+
"Echocardiogram",
96+
"Ecoli",
97+
"EnergyY1",
98+
"EnergyY2",
99+
"Fertility",
100+
"Flags",
101+
"Glass",
102+
"HabermanSurvival",
103+
"HayesRoth",
104+
"HeartCleveland",
105+
"HeartHungarian",
66106
]

torchhd/datasets/chess_krvkp.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class ChessKrvkp(DatasetFourFold):
6+
"""`Chess (King-Rook vs. King-Pawn) <https://archive.ics.uci.edu/ml/datasets/Chess+(King-Rook+vs.+King-Pawn)>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "chess-krvkp"
28+
classes: List[str] = [
29+
"White can win",
30+
"White cannot win",
31+
]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class CongressionalVoting(DatasetFourFold):
6+
"""`Congressional Voting Records <https://archive.ics.uci.edu/ml/datasets/congressional+voting+records>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "congressional-voting"
28+
classes: List[str] = [
29+
"Democrat",
30+
"Republican",
31+
]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class ConnBenchSonarMinesRocks(DatasetFourFold):
6+
"""`Connectionist Bench (Sonar, Mines vs. Rocks) <https://archive.ics.uci.edu/ml/datasets/connectionist+bench+(sonar,+mines+vs.+rocks)>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "conn-bench-sonar-mines-rocks"
28+
classes: List[str] = [
29+
"Mine",
30+
"Rock",
31+
]
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetTrainTest
3+
4+
5+
class ConnBenchVowelDeterding(DatasetTrainTest):
6+
"""`Connectionist Bench (Vowel Recognition - Deterding Data) <https://archive.ics.uci.edu/ml/datasets/Connectionist+Bench+(Vowel+Recognition+-+Deterding+Data)>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by hyper_search variable.
11+
Otherwise returns a subset of train dataset if hyperparameter search is performed (``hyper_search = True``) if not (``hyper_search = False``) returns test set.
12+
hyper_search (bool, optional): If True, creates dataset using indices in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
13+
while the second row corresponds to test indices (used if ``train = False``).
14+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
15+
and returns a transformed version.
16+
target_transform (callable, optional): A function/transform that takes in the
17+
target and transforms it.
18+
download (bool, optional): If True, downloads the dataset from the internet and
19+
puts it in root directory. If dataset is already downloaded, it is not
20+
downloaded again.
21+
"""
22+
23+
name = "conn-bench-vowel-deterding"
24+
classes: List[str] = [
25+
"0",
26+
"1",
27+
"2",
28+
"3",
29+
"4",
30+
"5",
31+
"6",
32+
"7",
33+
"8",
34+
"9",
35+
"10",
36+
]

torchhd/datasets/connect_4.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Connect4(DatasetFourFold):
6+
"""`Connect-4 <https://archive.ics.uci.edu/ml/datasets/connect-4>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "connect-4"
28+
classes: List[str] = [
29+
"draw",
30+
"loss",
31+
"win",
32+
]

torchhd/datasets/contrac.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class Contrac(DatasetFourFold):
6+
"""`Contraceptive Method Choice <https://archive.ics.uci.edu/ml/datasets/Contraceptive+Method+Choice>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "contrac"
28+
classes: List[str] = [
29+
"No-use",
30+
"Long-term",
31+
"Short-term",
32+
]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class CreditApproval(DatasetFourFold):
6+
"""`Credit Approval <https://archive.ics.uci.edu/ml/datasets/credit+approval>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "credit-approval"
28+
classes: List[str] = [
29+
"+",
30+
"-",
31+
]

torchhd/datasets/cylinder_bands.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import List
2+
from torchhd.datasets import DatasetFourFold
3+
4+
5+
class CylinderBands(DatasetFourFold):
6+
"""`Cylinder Bands <https://archive.ics.uci.edu/ml/datasets/Cylinder+Bands>`_ dataset.
7+
8+
Args:
9+
root (string): Root directory containing the files of the dataset.
10+
train (bool, optional): If True, returns training (sub)set from the file storing training data as further determined by fold and hyper_search variables.
11+
Otherwise returns a subset of train dataset if hypersearch is performed (``hyper_search = True``) if not (``hyper_search = False``) returns a subset of training dataset
12+
as specified in ``conxuntos_kfold.dat`` if fold number is correct. Otherwise issues an error.
13+
fold (int, optional): Specifies which fold number to use. The default value of -1 returns all the training data from the corresponding file.
14+
Values between 0 and 3 specify, which fold in ``conxuntos_kfold.dat`` to use. Relevant only if hyper_search is set to False and ``0 <= fold <= 3``.
15+
Indices in even rows (zero indexing) of ``conxuntos_kfold.dat`` correspond to train subsets while indices in odd rows correspond to test subsets.
16+
hyper_search (bool, optional): If True, creates dataset using indeces in ``conxuntos.dat``. This split is used for hyperparameter search. The first row corresponds to train indices (used if ``train = True``)
17+
while the second row corresponds to test indices (used if ``train = False``).
18+
transform (callable, optional): A function/transform that takes in an torch.FloatTensor
19+
and returns a transformed version.
20+
target_transform (callable, optional): A function/transform that takes in the
21+
target and transforms it.
22+
download (bool, optional): If True, downloads the dataset from the internet and
23+
puts it in root directory. If dataset is already downloaded, it is not
24+
downloaded again.
25+
"""
26+
27+
name = "cylinder-bands"
28+
classes: List[str] = [
29+
"band",
30+
"noband",
31+
]

0 commit comments

Comments
 (0)