-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
147 lines (119 loc) Β· 5.55 KB
/
run.py
File metadata and controls
147 lines (119 loc) Β· 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import argparse
import sys
from CW1.src.main import recreate_results, build_and_evaluate_decision_tree, cross_validate_decision_tree
def print_help():
"""Print detailed help information about available commands."""
help_text = """
DECISION TREE ANALYSIS - COMMAND LINE INTERFACE
===============================================
USAGE:
python run.py [COMMAND] [OPTIONS]
COMMANDS:
(no command) Run complete analysis pipeline (recreate_results)
build_tree Build and evaluate a single decision tree
cross_validate Perform k-fold cross-validation
--help, -h Show this help message
OPTIONS FOR build_tree:
--data_path PATH Path to dataset (default: wifi_db/clean_dataset.txt)
--train_split FLOAT Training split ratio (default: 0.8)
--random_seed INT Random seed for reproducibility (default: -1)
OPTIONS FOR cross_validate:
--k INT Number of folds (default: 10)
--data_path PATH Path to dataset (default: None)
--random_seed INT Random seed for reproducibility (default: -1)
OPTIONS FOR recreate_results (default):
--random_seed INT Random seed for reproducibility (default: -1)
EXAMPLES:
python3 run.py
python3 run.py --random_seed 42
python3 run.py build_tree --data_path wifi_db/noisy_dataset.txt
python3 run.py build_tree --train_split 0.7 --random_seed 123
python3 run.py cross_validate --k 5 --data_path wifi_db/noisy_dataset.txt
===============================================
"""
print(help_text)
def main():
parser = argparse.ArgumentParser(description='Decision Tree Analysis Tool', add_help=False)
# Add custom help flag
parser.add_argument('--help', '-h', action='store_true', help='Show help message')
# Command selection
parser.add_argument('command', nargs='?', choices=['build_tree', 'cross_validate'],
help='Command to execute')
# Common arguments
parser.add_argument('--random_seed', type=int, default=-1,
help='Random seed for reproducibility (default: -1)')
# build_tree specific arguments
parser.add_argument('--data_path', type=str, default='wifi_db/clean_dataset.txt',
help='Path to dataset (default: wifi_db/clean_dataset.txt)')
parser.add_argument('--train_split', type=float, default=0.8,
help='Training split ratio (default: 0.8)')
# cross_validate specific arguments
parser.add_argument('--k', type=int, default=10,
help='Number of folds for cross-validation (default: 10)')
args = parser.parse_args()
# Handle help
if args.help:
print_help()
return
# Handle different commands
if args.command is None:
# Default: recreate_results
print("π Running complete analysis pipeline...")
print(f"βοΈ Random seed: {args.random_seed}")
recreate_results(random_seed=args.random_seed)
elif args.command == 'build_tree':
# Validate arguments for build_tree
if not (0.1 <= args.train_split <= 0.9):
print("β Error: --train_split must be between 0.1 and 0.9")
print("π‘ Suggestion: Use --train_split 0.8 for 80% training data")
return
if not args.data_path.endswith('.txt'):
print("β Error: --data_path must point to a .txt file")
print("π‘ Suggestion: Use a valid dataset path like wifi_db/clean_dataset.txt")
return
print("π³ Building and evaluating decision tree...")
print(f"π Data path: {args.data_path}")
print(f"π Train split: {args.train_split}")
print(f"βοΈ Random seed: {args.random_seed}")
# Call with only the parameters the function accepts
build_and_evaluate_decision_tree(
data_path=args.data_path,
train_test_split=args.train_split, # Changed from train_split to train_test_split
prune=False, # Pruning option removed
random_seed=args.random_seed
)
elif args.command == 'cross_validate':
# Validate arguments for cross_validate
if args.k < 2:
print("β Error: --k must be at least 2 for cross-validation")
print("π‘ Suggestion: Use --k 10 for 10-fold cross-validation")
return
if args.k > 20:
print("β οΈ Warning: Large k values (>20) may be computationally expensive")
response = input("Continue? (y/n): ")
if response.lower() != 'y':
return
print("π Performing k-fold cross-validation...")
print(f"π’ K-folds: {args.k}")
if not args.data_path.endswith('.txt'):
print("β Error: --data_path must point to a .txt file")
print("π‘ Suggestion: Use a valid dataset path like wifi_db/clean_dataset.txt")
return
print(f"π Data path: {args.data_path}")
print(f"βοΈ Random seed: {args.random_seed}")
# Call with only the parameters the function accepts
cross_validate_decision_tree(
k=args.k,
data_path=args.data_path,
random_seed=args.random_seed
)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\nβ οΈ Operation cancelled by user")
sys.exit(1)
except Exception as e:
print(f"β Error: {e}")
print("π‘ Use 'python run.py --help' for usage information")
sys.exit(1)