Python - Dimension Reduction – Auto Encoder
Data:
Employees when they sent job applicant (40 rows)
Mission:
Accuracy comparison experiment between Auto Encoder and K-Nearest Neighbours (KNN), Decision Tree, Naïve Bayes, Support Vector Machine
Library used:
Pandas
Numpy
Matplotlib
Seaborn
Scikit
Keras
Code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from keras.layers import Input, Dense
from keras.models import Model, Sequential
from keras import regularizers
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
url = 'https://raw.githubusercontent.com/kokocamp/vlog119/main/vlog119.csv'
vlog135 = pd.read_csv(url)
vlog135.describe()
X = vlog135[['gpa','gmat','work_experience']]
y = vlog135['admitted']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=0)
tsne = TSNE(n_components = 3, random_state = 0)
X_tsne = tsne.fit_transform(X)
plt.scatter(X_tsne[np.where(y == 0), 0],
X_tsne[np.where(y == 0), 1],
marker ='o', color ='y', linewidth ='1',
alpha = 0.8, label ='Gagal')
plt.scatter(X_tsne[np.where(y == 1), 0],
X_tsne[np.where(y == 1), 1],
marker ='o', color ='k', linewidth ='1',
alpha = 0.8, label ='Lulus')
plt.legend()
plt.show()
X_skala = MinMaxScaler().fit_transform(X)
X_gagal_skala = X_skala[y == 0]
X_lulus_skala = X_skala[y == 1]
# Building the Input Layer
input_layer = Input(shape =(X.shape[1], ))
# Building the Encoder network
encoded = Dense(100, activation ='tanh',
activity_regularizer = regularizers.l1(10e-5))(input_layer)
encoded = Dense(50, activation ='tanh',
activity_regularizer = regularizers.l1(10e-5))(encoded)
encoded = Dense(25, activation ='tanh',
activity_regularizer = regularizers.l1(10e-5))(encoded)
encoded = Dense(12, activation ='tanh',
activity_regularizer = regularizers.l1(10e-5))(encoded)
encoded = Dense(6, activation ='relu')(encoded)
# Building the Decoder network
decoded = Dense(12, activation ='tanh')(encoded)
decoded = Dense(25, activation ='tanh')(decoded)
decoded = Dense(50, activation ='tanh')(decoded)
decoded = Dense(100, activation ='tanh')(decoded)
# Building the Output Layer
output_layer = Dense(X.shape[1], activation ='relu')(decoded)
# Defining the parameters of the Auto-encoder network
autoencoder = Model(input_layer, output_layer)
autoencoder.compile(optimizer ="adadelta", loss ="mse")
# Training the Auto-encoder network
autoencoder.fit(X_lulus_skala, X_lulus_skala,
batch_size = 16, epochs = 10,
shuffle = True, validation_split = 0.25)
hidden_representation = Sequential()
hidden_representation.add(autoencoder.layers[0])
hidden_representation.add(autoencoder.layers[1])
hidden_representation.add(autoencoder.layers[2])
hidden_representation.add(autoencoder.layers[3])
hidden_representation.add(autoencoder.layers[4])
# Separating the points encoded by the Auto-encoder as normal and fraud
gagal_hidden_rep = hidden_representation.predict(X_gagal_skala)
lulus_hidden_rep = hidden_representation.predict(X_lulus_skala)
# Combining the encoded points into a single table
encoded_X = np.append(gagal_hidden_rep, lulus_hidden_rep, axis = 0)
y_gagal = np.zeros(gagal_hidden_rep.shape[0])
y_lulus = np.ones(lulus_hidden_rep.shape[0])
encoded_y = np.append(y_gagal, y_lulus)
# Plotting the encoded points
#tsne_plot(encoded_X, encoded_y)
tsne = TSNE(n_components = 2, random_state = 0)
X_tsne = tsne.fit_transform(encoded_X)
plt.scatter(X_tsne[np.where(encoded_y == 0), 0],
X_tsne[np.where(encoded_y == 0), 1],
marker ='o', color ='y', linewidth ='1',
alpha = 0.8, label ='Gagal')
plt.scatter(X_tsne[np.where(encoded_y == 1), 0],
X_tsne[np.where(encoded_y == 1), 1],
marker ='o', color ='k', linewidth ='1',
alpha = 0.8, label ='Lulus')
plt.legend()
plt.show()
# Splitting the encoded data for linear classification
X_train_encoded, X_test_encoded, y_train_encoded, y_test_encoded = train_test_split(encoded_X, encoded_y, test_size = 0.25)
# Splitting the original data for non-linear classification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)
# Building the logistic regression model
logistic_regression = LogisticRegression()
logistic_regression.fit(X_train_encoded, y_train_encoded)
# Storing the predictions of the linear model
y_pred_logistic_regression = logistic_regression.predict(X_test_encoded)
# Evaluating the performance of the linear model
print('Accuracy : '+str(accuracy_score(y_test_encoded, y_pred_logistic_regression)))
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
# Storing the predictions of the non-linear model
y_pred_knn = knn.predict(X_test)
# Evaluating the performance of the non-linear model
print('Accuracy : '+str(accuracy_score(y_test, y_pred_knn)))
d3 = DecisionTreeClassifier()
d3.fit(X_train, y_train)
# Storing the predictions of the non-linear model
y_pred_d3 = d3.predict(X_test)
# Evaluating the performance of the non-linear model
print('Accuracy : '+str(accuracy_score(y_test, y_pred_d3)))
nb = GaussianNB()
nb.fit(X_train, y_train)
# Storing the predictions of the non-linear model
y_pred_nb = nb.predict(X_test)
# Evaluating the performance of the non-linear model
print('Accuracy : '+str(accuracy_score(y_test, y_pred_nb)))
# Building the SVM model
svm = SVC()
svm.fit(X_train, y_train)
# Storing the predictions of the non-linear model
y_pred_svm = svm.predict(X_test)
# Evaluating the performance of the non-linear model
print('Accuracy : '+str(accuracy_score(y_test, y_pred_svm)))
I wrapped the scenario in a Youtube video below.
Click this link (http://paparadit.blogspot.com/2020/11/the-algorithms-of-machine-learning.html), if you want to check out for other algorithms. Thank you for for visiting this blog & subs my channel.
Labels: Programming, Python
PS: If you've benefit from this blog, you can support it by making a small contribution. |
Post a Comment
Leave comments here...