-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathml_predict.py
More file actions
65 lines (46 loc) · 1.88 KB
/
ml_predict.py
File metadata and controls
65 lines (46 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from hdfs import InsecureClient
from pyspark.ml.regression import RandomForestRegressionModel
from pyspark.sql import SparkSession
from DistributedML.ml_trainer import preprocess_data, load_data, evaluate_model
def choose_model(path):
# Connect to the HDFS cluster
hdfs = InsecureClient(url='http://10.4.41.44:9870', user='bdm')
# List the models
model_files = hdfs.list(path)
# Extract the model names from the file paths
model_names = [file_path.split('/')[-1] for file_path in model_files]
# Display the model names and let the user choose
print("Available models:")
for i, model_name in enumerate(model_names):
print(f"{i+1}. {model_name}")
# Prompt the user to choose a model
while True:
try:
choice = int(input("Enter the number corresponding to the model you want to choose: "))
if 1 <= choice <= len(model_names):
break
else:
print("Invalid choice. Please enter a valid number.")
except ValueError:
print("Invalid input. Please enter a number.")
# get the model file path
model_file_path = "hdfs://10.4.41.44:27000/user/bdm/" + path + '/' + model_files[choice - 1]
# Load the model
model = RandomForestRegressionModel.load(model_file_path)
# Return the chosen model
return model
def deploy_and_predict():
# Define the path to the models
model_path = "models"
# Create a SparkSession
spark = SparkSession.builder.appName("ModelPrediction").getOrCreate()
# Choose the model
model = choose_model(model_path)
# define dataset path
dataset_path = "hdfs://10.4.41.44:27000/user/bdm/dataset/rentdataset.csv"
# Load the data
data = load_data(dataset_path)
# Preprocess the data
train_data, test_data = preprocess_data(data)
# Evaluate the model
evaluate_model(model, test_data)