Data Loading and Basic Information¶
Import required libraries, read the Churn.csv dataset, and display basic info to understand columns, data types, and missing values.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
data_path = '/content/Churn.csv'
data = pd.read_csv(data_path)
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7043 entries, 0 to 7042 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 customerID 7043 non-null object 1 gender 7043 non-null object 2 SeniorCitizen 7043 non-null int64 3 Partner 7043 non-null object 4 Dependents 7043 non-null object 5 tenure 7043 non-null int64 6 PhoneService 7043 non-null object 7 MultipleLines 7043 non-null object 8 InternetService 7043 non-null object 9 OnlineSecurity 7043 non-null object 10 OnlineBackup 7043 non-null object 11 DeviceProtection 7043 non-null object 12 TechSupport 7043 non-null object 13 StreamingTV 7043 non-null object 14 StreamingMovies 7043 non-null object 15 Contract 7043 non-null object 16 PaperlessBilling 7043 non-null object 17 PaymentMethod 7043 non-null object 18 MonthlyCharges 7043 non-null float64 19 TotalCharges 7043 non-null object 20 Churn 7043 non-null object dtypes: float64(1), int64(2), object(18) memory usage: 1.1+ MB
Data Cleaning and Feature Selection¶
Convert TotalCharges to numeric, drop missing values, and select key categorical and numerical features for analysis.
data_v = data.copy()
data_v['TotalCharges'] = pd.to_numeric(data_v['TotalCharges'], errors='coerce')
data_v = data_v.dropna(subset=['TotalCharges'])
feature_1 = ['gender', 'SeniorCitizen' ,'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService' ,'PaymentMethod', 'Contract' , 'PaperlessBilling']
plt.figure(figsize=(6, 4))
sns.countplot(data=data_v, x='Churn')
plt.title('Churn')
plt.show()
fig, axes = plt.subplots(5, 2, figsize=(20, 30))
axes = axes.flatten()
for i, feature in enumerate(feature_1):
sns.countplot(data=data_v, x=feature, hue='Churn', ax=axes[i])
axes[i].tick_params(axis='x', rotation=45)
axes[i].legend(title='Churn', loc='upper right')
axes[i].set_title(f'Churn with {feature}')
total = len(data_v)
for p in axes[i].patches:
height = p.get_height()
axes[i].text(p.get_x() + p.get_width()/2., height + 50,
f'{height/total:.1%}', ha='center')
plt.tight_layout()
plt.show()
Visualizing Key Features¶
Plot distributions of important categorical and numerical features to explore their relationship with customer churn.
feature_2 = ['OnlineSecurity', 'OnlineBackup' ,'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']
fig, axes = plt.subplots(3, 2, figsize=(20, 30))
axes = axes.flatten()
for i, feature in enumerate(feature_2):
sns.countplot(data=data_v, x=feature, hue='Churn', ax=axes[i])
axes[i].tick_params(axis='x', rotation=45)
axes[i].legend(title='Churn', loc='upper right')
axes[i].set_title(f'Churn with {feature}')
total = len(data_v)
for p in axes[i].patches:
height = p.get_height()
axes[i].text(p.get_x() + p.get_width()/2., height + 50,
f'{height/total:.1%}', ha='center')
plt.tight_layout()
plt.show()
feature_3 = ['tenure', 'MonthlyCharges', 'TotalCharges']
for feature in feature_3:
plt.figure(figsize=(10, 5))
sns.histplot(data=data_v, x=feature, hue='Churn', kde=True, element='step')
plt.title(f'Distribution of {feature} by Churn')
plt.show()
plt.figure(figsize=(10, 5))
sns.boxplot(data=data_v, x=feature, y="Churn")
plt.title(f'Box Plot of {feature} by Churn')
plt.show()
Checking for Missing Values¶
Recheck dataset info and count of null values after initial cleaning to ensure data quality.
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7043 entries, 0 to 7042 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 customerID 7043 non-null object 1 gender 7043 non-null object 2 SeniorCitizen 7043 non-null int64 3 Partner 7043 non-null object 4 Dependents 7043 non-null object 5 tenure 7043 non-null int64 6 PhoneService 7043 non-null object 7 MultipleLines 7043 non-null object 8 InternetService 7043 non-null object 9 OnlineSecurity 7043 non-null object 10 OnlineBackup 7043 non-null object 11 DeviceProtection 7043 non-null object 12 TechSupport 7043 non-null object 13 StreamingTV 7043 non-null object 14 StreamingMovies 7043 non-null object 15 Contract 7043 non-null object 16 PaperlessBilling 7043 non-null object 17 PaymentMethod 7043 non-null object 18 MonthlyCharges 7043 non-null float64 19 TotalCharges 7043 non-null object 20 Churn 7043 non-null object dtypes: float64(1), int64(2), object(18) memory usage: 1.1+ MB
data.isna().sum()
| 0 | |
|---|---|
| customerID | 0 |
| gender | 0 |
| SeniorCitizen | 0 |
| Partner | 0 |
| Dependents | 0 |
| tenure | 0 |
| PhoneService | 0 |
| MultipleLines | 0 |
| InternetService | 0 |
| OnlineSecurity | 0 |
| OnlineBackup | 0 |
| DeviceProtection | 0 |
| TechSupport | 0 |
| StreamingTV | 0 |
| StreamingMovies | 0 |
| Contract | 0 |
| PaperlessBilling | 0 |
| PaymentMethod | 0 |
| MonthlyCharges | 0 |
| TotalCharges | 0 |
| Churn | 0 |
Preprocessing Pipeline¶
Define a preprocessing function to remove unnecessary columns, handle missing values, and prepare data for modeling.
from sklearn.model_selection import train_test_split,GridSearchCV, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score,classification_report, confusion_matrix,accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from scipy.stats import loguniform
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from keras.optimizers import SGD
from sklearn.model_selection import RandomizedSearchCV
def preprocess_data(data):
data = data.drop(columns=["customerID", "gender"], axis=1, errors="ignore")
data["TotalCharges"] = pd.to_numeric(data["TotalCharges"], errors='coerce')
if data['TotalCharges'].isna().sum()>0:
print(f"Totalcharge have {data['TotalCharges'].isna().sum()} null value")
data['TotalCharges'] = data['TotalCharges'].fillna(data['TotalCharges'].median())
print(f"Totalcharge now have {data['TotalCharges'].isna().sum()} null value")
encoder = LabelEncoder()
scaler = StandardScaler()
num_column=["tenure","MonthlyCharges","TotalCharges"]
data[num_column] = scaler.fit_transform(data[num_column])
categorical_column = data.select_dtypes(include=['object']).columns
for col in categorical_column:
data[col] = encoder.fit_transform(data[col])
return data
data=preprocess_data(data)
Totalcharge have 11 null value Totalcharge now have 0 null value
Splitting Data into Training and Testing Sets¶
Separate features and target, then split the dataset for model training and evaluation.
x = data.drop("Churn", axis=1)
y = data["Churn"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
model_accuracy = {}
Logistic Regression Model¶
Train and evaluate a Logistic Regression model as a baseline.
logistic_model = LogisticRegression(max_iter=100, random_state=42)
logistic_model.fit(x_train, y_train)
y_logistic_pred = logistic_model.predict(x_test)
model_accuracy['Logistic Regression'] = accuracy_score(y_test, y_logistic_pred)
Support Vector Machine (SVM)¶
Train and test a linear SVM classifier.
svm_model = SVC(kernel='linear')
svm_model.fit(x_train, y_train)
y_svm_pred = svm_model.predict(x_test)
model_accuracy['svm model'] = accuracy_score(y_test, y_svm_pred)
K-Nearest Neighbors (KNN)¶
Train and test a KNN classifier with k=7.
knn_model = KNeighborsClassifier(n_neighbors=7)
knn_model.fit(x_train, y_train)
y_knn_pred = knn_model.predict(x_test)
Decision Tree¶
Build a Decision Tree with entropy criterion and evaluate performance.
dtree_model = DecisionTreeClassifier(max_depth=6,criterion='entropy', random_state=42)
dtree_model.fit(x_train, y_train)
y_dtree_pred = dtree_model.predict(x_test)
model_accuracy['Decision Tree'] = accuracy_score(y_test, y_dtree_pred)
Random Forest¶
Train a Random Forest classifier with tuned hyperparameters.
randomforesrt_model = RandomForestClassifier(n_estimators=200,max_depth=10, random_state=42)
randomforesrt_model.fit(x_train, y_train)
y_randomforesrt_pred = randomforesrt_model.predict(x_test)
model_accuracy['random forest'] = accuracy_score(y_test, y_randomforesrt_pred)
Deep Learning Model (MLP)¶
Train a Multi-layer Perceptron classifier with multiple hidden layers.
Dl_model_1 = MLPClassifier(
hidden_layer_sizes=(128, 64, 32),
activation='relu',
solver='adam',
max_iter=500,
random_state=42,
early_stopping=True,
n_iter_no_change=10
)
Dl_model_1.fit(x_train, y_train)
y_pred_Dl_1 = Dl_model_1.predict(x_test)
model_accuracy['Dl_model_1 '] = accuracy_score(y_test, y_pred_Dl_1)
Keras Sequential Neural Network¶
Build and compile a simple Keras neural network for churn prediction.
Sequentialmodel_1 = Sequential([
Dense(64, activation='relu', input_shape=(x_train.shape[1],)),
Dense(32, activation='relu'),
Dense(1, activation='sigmoid' )
])
Sequentialmodel_1.compile(optimizer='SGD', loss='binary_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
Sequentialmodel_1.fit(x_train, y_train, epochs=50, batch_size=40 ,validation_data=(x_test, y_test),callbacks=[early_stopping])
test_loss, test_accuracy = Sequentialmodel_1.evaluate(x_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy}")
model_accuracy['Sequentialmodel_1 '] = test_accuracy
Epoch 1/50
/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.6942 - loss: 0.5669 - val_accuracy: 0.7594 - val_loss: 0.4751 Epoch 2/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.7625 - loss: 0.4805 - val_accuracy: 0.7807 - val_loss: 0.4399 Epoch 3/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.7855 - loss: 0.4514 - val_accuracy: 0.7970 - val_loss: 0.4243 Epoch 4/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.7863 - loss: 0.4459 - val_accuracy: 0.7963 - val_loss: 0.4171 Epoch 5/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.7848 - loss: 0.4334 - val_accuracy: 0.8013 - val_loss: 0.4136 Epoch 6/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7886 - loss: 0.4401 - val_accuracy: 0.8048 - val_loss: 0.4114 Epoch 7/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.7983 - loss: 0.4263 - val_accuracy: 0.8112 - val_loss: 0.4097 Epoch 8/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7943 - loss: 0.4234 - val_accuracy: 0.8105 - val_loss: 0.4084 Epoch 9/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8004 - loss: 0.4181 - val_accuracy: 0.8105 - val_loss: 0.4076 Epoch 10/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.7947 - loss: 0.4231 - val_accuracy: 0.8133 - val_loss: 0.4065 Epoch 11/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7983 - loss: 0.4204 - val_accuracy: 0.8148 - val_loss: 0.4058 Epoch 12/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7940 - loss: 0.4302 - val_accuracy: 0.8155 - val_loss: 0.4052 Epoch 13/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.7944 - loss: 0.4261 - val_accuracy: 0.8155 - val_loss: 0.4048 Epoch 14/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.7960 - loss: 0.4207 - val_accuracy: 0.8126 - val_loss: 0.4053 Epoch 15/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8025 - loss: 0.4129 - val_accuracy: 0.8084 - val_loss: 0.4061 Epoch 16/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7961 - loss: 0.4225 - val_accuracy: 0.8112 - val_loss: 0.4048 Epoch 17/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8034 - loss: 0.4200 - val_accuracy: 0.8141 - val_loss: 0.4037 Epoch 18/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.8117 - loss: 0.4127 - val_accuracy: 0.8126 - val_loss: 0.4048 Epoch 19/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7951 - loss: 0.4156 - val_accuracy: 0.8141 - val_loss: 0.4037 Epoch 20/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.8048 - loss: 0.4047 - val_accuracy: 0.8155 - val_loss: 0.4033 Epoch 21/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.7995 - loss: 0.4116 - val_accuracy: 0.8126 - val_loss: 0.4035 Epoch 22/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8056 - loss: 0.4103 - val_accuracy: 0.8162 - val_loss: 0.4037 Epoch 23/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7996 - loss: 0.4233 - val_accuracy: 0.8162 - val_loss: 0.4031 Epoch 24/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.8125 - loss: 0.4061 - val_accuracy: 0.8133 - val_loss: 0.4041 Epoch 25/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8002 - loss: 0.4176 - val_accuracy: 0.8155 - val_loss: 0.4033 Epoch 26/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.8010 - loss: 0.4180 - val_accuracy: 0.8133 - val_loss: 0.4041 Epoch 27/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8087 - loss: 0.4129 - val_accuracy: 0.8098 - val_loss: 0.4059 Epoch 28/50 141/141 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8023 - loss: 0.4241 - val_accuracy: 0.8183 - val_loss: 0.4034 Test Accuracy: 0.8161816596984863
Model Accuracy Comparison¶
Compare accuracy scores of all models to identify the best-performing churn prediction method.
y_train2 = to_categorical(y_train, num_classes=2)
y_test2 = to_categorical(y_test, num_classes=2)
Sequentialmodel_2 = Sequential([
Dense(256, activation='relu', input_shape=(x_train.shape[1],)),
Dropout(0.5),
Dense(128, activation='relu'),
Dropout(0.5),
Dense(64, activation='relu'),
Dropout(0.5),
Dense(2, activation='softmax')
])
Sequentialmodel_2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
Sequentialmodel_2.fit(x_train, y_train2, epochs=30, batch_size=20, validation_data=(x_test, y_test2))
test2_loss, test_accuracy2 = Sequentialmodel_2.evaluate(x_test, y_test2, verbose=0)
print(f"Test Accuracy: {test_accuracy2}")
model_accuracy['Sequentialmodel_2 '] = test_accuracy2
Epoch 1/30
/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
282/282 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - accuracy: 0.7093 - loss: 0.5650 - val_accuracy: 0.8105 - val_loss: 0.4107 Epoch 2/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.7798 - loss: 0.4638 - val_accuracy: 0.8112 - val_loss: 0.4124 Epoch 3/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.7993 - loss: 0.4363 - val_accuracy: 0.8176 - val_loss: 0.4249 Epoch 4/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.7875 - loss: 0.4483 - val_accuracy: 0.8176 - val_loss: 0.4100 Epoch 5/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7929 - loss: 0.4441 - val_accuracy: 0.8006 - val_loss: 0.4104 Epoch 6/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8015 - loss: 0.4336 - val_accuracy: 0.8119 - val_loss: 0.4222 Epoch 7/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.7923 - loss: 0.4460 - val_accuracy: 0.8077 - val_loss: 0.4066 Epoch 8/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.7960 - loss: 0.4373 - val_accuracy: 0.8112 - val_loss: 0.4059 Epoch 9/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.7979 - loss: 0.4337 - val_accuracy: 0.8133 - val_loss: 0.4045 Epoch 10/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 2s 4ms/step - accuracy: 0.7950 - loss: 0.4297 - val_accuracy: 0.8062 - val_loss: 0.4077 Epoch 11/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8002 - loss: 0.4346 - val_accuracy: 0.8070 - val_loss: 0.4067 Epoch 12/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.7865 - loss: 0.4415 - val_accuracy: 0.8077 - val_loss: 0.4049 Epoch 13/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.8101 - loss: 0.4275 - val_accuracy: 0.8091 - val_loss: 0.4072 Epoch 14/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.8006 - loss: 0.4282 - val_accuracy: 0.8020 - val_loss: 0.4121 Epoch 15/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - accuracy: 0.8087 - loss: 0.4172 - val_accuracy: 0.8041 - val_loss: 0.4067 Epoch 16/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 5s 14ms/step - accuracy: 0.8009 - loss: 0.4194 - val_accuracy: 0.8027 - val_loss: 0.4071 Epoch 17/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - accuracy: 0.8073 - loss: 0.4237 - val_accuracy: 0.8062 - val_loss: 0.4093 Epoch 18/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8028 - loss: 0.4222 - val_accuracy: 0.8055 - val_loss: 0.4108 Epoch 19/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8057 - loss: 0.4248 - val_accuracy: 0.8098 - val_loss: 0.4088 Epoch 20/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8053 - loss: 0.4195 - val_accuracy: 0.8091 - val_loss: 0.4234 Epoch 21/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8013 - loss: 0.4292 - val_accuracy: 0.8105 - val_loss: 0.4089 Epoch 22/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.7939 - loss: 0.4338 - val_accuracy: 0.8091 - val_loss: 0.4114 Epoch 23/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.8054 - loss: 0.4193 - val_accuracy: 0.8141 - val_loss: 0.4066 Epoch 24/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 2s 4ms/step - accuracy: 0.8072 - loss: 0.4229 - val_accuracy: 0.8084 - val_loss: 0.4062 Epoch 25/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.7963 - loss: 0.4227 - val_accuracy: 0.8048 - val_loss: 0.4070 Epoch 26/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8129 - loss: 0.4095 - val_accuracy: 0.8062 - val_loss: 0.4112 Epoch 27/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.8089 - loss: 0.4200 - val_accuracy: 0.8070 - val_loss: 0.4092 Epoch 28/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8152 - loss: 0.4216 - val_accuracy: 0.8077 - val_loss: 0.4059 Epoch 29/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.8078 - loss: 0.4057 - val_accuracy: 0.8084 - val_loss: 0.4088 Epoch 30/30 282/282 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.8158 - loss: 0.3995 - val_accuracy: 0.8091 - val_loss: 0.4177 Test Accuracy: 0.8090844750404358
print("the accuracy of the logistic regression model is",accuracy_score(y_test, y_logistic_pred))
print("the accuracy of the decision tree model is : ",accuracy_score(y_test,y_dtree_pred))
print("the accuracy of the svm model is : ",accuracy_score(y_test,y_svm_pred))
print("the accuracy of the random forest model is : ",accuracy_score(y_test,y_randomforesrt_pred))
print("the accuracy of the Deep Learning model_1 is : ",accuracy_score(y_test,y_pred_Dl_1))
print("the accuracy of the Sequential model_1 is : ",test_accuracy)
print("the accuracy of the Sequential model_2 is : ",test_accuracy2)
print("the accuracy of the knn model is : ",accuracy_score(y_test,y_knn_pred))
the accuracy of the logistic regression model is 0.8161816891412349 the accuracy of the decision tree model is : 0.7991483321504613 the accuracy of the svm model is : 0.8190205819730305 the accuracy of the random forest model is : 0.8076650106458482 the accuracy of the Deep Learning model_1 is : 0.8161816891412349 the accuracy of the Sequential model_1 is : 0.8161816596984863 the accuracy of the Sequential model_2 is : 0.8090844750404358 the accuracy of the knn model is : 0.7856635911994322
plt.figure(figsize=(10, 6))
sns.barplot(x=list(model_accuracy.keys()), y=list(model_accuracy.values()))
plt.xlabel("Model")
plt.ylabel("Accuracy")
plt.title("Comparison of Model Accuracies")
plt.xticks(rotation=45)
plt.show()
param_distributions = {
'C': loguniform(1e-3, 1e3),
'kernel': ['linear', 'rbf','sigmoid','poly'],
'gamma': loguniform(1e-4, 1e1),
'class_weight': [None, 'balanced']
}
svm_model = SVC()
random_search = RandomizedSearchCV(
estimator=svm_model,
param_distributions=param_distributions,
n_iter=25,
cv=3,
scoring='accuracy',
random_state=42,
)
random_search.fit(x_train, y_train)
print("Best parameters (Randomized Search):", random_search.best_params_)
print("Best score (Randomized Search):", random_search.best_score_)
Best parameters (Randomized Search): {'C': 4.418441521199722, 'class_weight': None, 'gamma': 0.017885301261862014, 'kernel': 'rbf'}
Best score (Randomized Search): 0.7958821441249556