-
Notifications
You must be signed in to change notification settings - Fork 0
/
PredictCarbonMonoxideLevels.py
136 lines (112 loc) · 4.97 KB
/
PredictCarbonMonoxideLevels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# imported libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score, train_test_split
# glboal variables
filePath = "C:/Users/acer/Documents/Platform Technologies Grouping Case Study/AirQualityUCI.xlsx"
dataSet = pd.read_excel(filePath)
summary = dataSet.describe()
# load the dataset
def loadTheDataset():
print(dataSet)
# visualize the dataset
def visualizeTheDataset():
summary_transposed = summary.T
# Plot bar plots for mean and standard deviation
summary_transposed = summary.T
plt.figure(figsize=(12, 6))
sns.barplot(x=summary_transposed.index[1:], # Exclude 'Date' from the x-axis
y=summary_transposed['mean'].iloc[1:], color='blue', alpha=0.7, label='Mean')
sns.barplot(x=summary_transposed.index[1:], # Exclude 'Date' from the x-axis
y=summary_transposed['std'].iloc[1:], color='orange', alpha=0.7, label='Std Dev')
plt.title('Air Quality Visualization Data Set')
plt.xticks(rotation=45)
plt.legend()
plt.show()
def evaluate_model(model, X_val, y_val, X_test, y_test):
# Make predictions on the validation set
predictions_val = model.predict(X_val)
# Make predictions on the testing set
predictions_test = model.predict(X_test)
# Line chart of actual vs. predicted values for both sets
plt.figure(figsize=(12, 8))
plt.plot(y_val, predictions_val, 'o-', color='red', label='Validation Set', alpha=0.8, linewidth=2, markersize=8)
plt.plot(y_test, predictions_test, 'o-', color='blue', label='Test Set', alpha=0.8, linewidth=2, markersize=8)
plt.title(f'Model Evaluation: {model.__class__.__name__}', fontsize=16)
plt.xlabel('Actual Carbon Monoxide Concentrations', fontsize=14)
plt.ylabel('Predicted Carbon Monoxide Concentrations', fontsize=14)
plt.legend(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
# Evaluate predictions on the validation set
mse_val = mean_squared_error(y_val, predictions_val)
r2_val = r2_score(y_val, predictions_val)
print(f"\nModel Evaluation: {model.__class__.__name__} on Validation Set")
print(f'Mean Squared Error: {mse_val:.2f}')
print(f'R-squared: {r2_val:.2f}')
# Evaluate predictions on the new data set
mse_test = mean_squared_error(y_test, predictions_test)
r2_test = r2_score(y_test, predictions_test)
print(f"\nModel Evaluation: {model.__class__.__name__} on New Data Set")
print(f'Mean Squared Error: {mse_test:.2f}')
print(f'R-squared: {r2_test:.2f}')
# predict carbon monoxide levels
def predictCarbonMonoxideLevels():
features = dataSet.drop(columns=['CO(GT)', 'Date', 'Time'])
target = dataSet['CO(GT)']
# Split the dataset into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(features, target, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
# Initialize the models or spot check algorithms
models = [LinearRegression(),
DecisionTreeRegressor(),
RandomForestRegressor(),
GradientBoostingRegressor(),
SVR()]
for model in models:
# Train the model on the training set
model.fit(X_train, y_train)
evaluate_model(model, X_val, y_val, X_test, y_test)
# Cross-validate the model
# make predictions using new dataset
cross_val_scores = cross_val_score(model, features, target, cv=5, scoring='neg_mean_squared_error')
mean_cross_val_mse = np.mean(np.abs(cross_val_scores))
print(f'Mean Cross-Validated MSE for {model.__class__.__name__}: {mean_cross_val_mse}\n')
# main menu
def menu():
while True:
print("Air Quality Model")
print("Menu")
print("[1] Load the dataset")
print("[2] Visualize the dataset")
print("[3] Predict Carbon Monoxide Levels Using Different Models")
print("[4] Exit")
choice = int(input("Enter your choice: "))
try:
if choice == 1:
loadTheDataset()
menu()
elif choice == 2:
visualizeTheDataset()
menu()
elif choice == 3:
predictCarbonMonoxideLevels()
menu()
elif choice == 4:
print("Program Terminated")
menu()
else:
print("Invalid input, please input 1 and 4.")
except:
print("Invalid input, please input a valid integer!")
if __name__ == "__main__":
menu()