-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathFilterNode.cpp
More file actions
86 lines (63 loc) · 2.81 KB
/
FilterNode.cpp
File metadata and controls
86 lines (63 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include "FilterNode.hpp"
FilterNode::FilterNode(std::string name) : Node(name) {}
std::set<std::string> FilterNode::ColumnNames() {
return columnNames;
}
void FilterNode::SetColumnNames(std::set<std::string> colNames) {
columnNames = colNames;
}
void FilterNode::AddColumn(std::string colName) {
columnNames.insert(colName);
}
void FilterNode::RemoveColName(std::string colName) {
for(auto iter = columnNames.begin(); iter != columnNames.end(); iter++) {
if(0 == colName.compare(*iter)) {
columnNames.erase(iter);
break;
}
}
}
void FilterNode::run() {
auto matrix = inputDataTable->DataMatrix();
auto colNames = inputDataTable->ColumnNames();
auto catCol = inputDataTable->CategoricalValues();
//Number of binarized columns we skip
size_t catColSum = 0;
size_t numOfFiltered = 0;
//Iterating through column names in data table
for (size_t i = 0; i != colNames.size(); i++) {
//If the current column name exists in the set of column names that need to be filtered
if(columnNames.end() != columnNames.find(colNames[i])) {
//If the current column name exists also in categoricalValues of input
if(catCol.find(colNames[i]) != catCol.end()) {
//Erase column name from categoricalValues and
size_t numOfCol = catCol.at(colNames[i]).size();
catCol.erase(colNames[i]);
//Shed column with index i, numOfCol times (arma::mat shed_cols(i), all indexes after i will decrement)
//i-numOfFiltered because of the above reason (if we already sheded a column)
while(numOfCol > 0) {
matrix.shed_col(i+catColSum-numOfFiltered);
numOfCol--;
}
//Otherwise, just shed i+catColSum (add the number of categorical binarized columns that we didnt filter)
} else {
matrix.shed_col(i +catColSum);
}
numOfFiltered++;
} else {
//Otherwise if column name is a key in the map categoricalValues, increase catColSum by the number of columns we skipped
if(catCol.find(colNames[i]) != catCol.end()) {
catColSum += catCol.at(colNames[i]).size() -1;
}
}
}
//Make a new vector of column names
std::vector<std::string> filteredColNames(colNames.size() - columnNames.size());
std::copy_if(colNames.begin(), colNames.end(), filteredColNames.begin(), [this](std::string x){return (columnNames.end() == columnNames.find(x));});
//Make and set a new data table. Set it to be outDataTable
DataTable dt(*inputDataTable);
dt.SetDataMatrix(matrix);
dt.SetColumnNames(filteredColNames);
dt.SetCategoricalValues(catCol);
this->setOutDataTable(dt);
}