Linear discriminant analysis in Python

Linear discriminant analysis (LDA) is a dimensionality reduction technique for pattern recognition and classification. It transforms the data into a lower dimensional space, maximizing the separation between classes, thus facilitating the distinction of different data categories.

The following steps demonstrate the process of training and visualizing a LDA model using the provided dataset.

Step 1 - Importing the libraries

In the first step, we import the necessary libraries.

Error: Code Widget Crashed, Please Contact Support

from matplotlib.colors import ListedColormap
seq1, seq2 = label1_train, label2_train
grid1, grid2 = np.meshgrid(np.arange(start = seq1[:, 0].min() - 1, stop = seq1[:, 0].max() + 1, step = 0.01),
                     np.arange(start = seq1[:, 1].min() - 1, stop = seq1[:, 1].max() + 1, step = 0.01))
plt.contourf(grid1, grid2, model.predict(np.array([grid1.ravel(), grid2.ravel()]).T).reshape(grid1.shape),
             alpha = 0.75, cmap = ListedColormap(('lightblue', 'peachpuff', 'mistyrose')))
plt.xlim(grid1.min(), grid1.max())
plt.ylim(grid2.min(), grid2.max())
for key, value in enumerate(np.unique(seq2)):
    plt.scatter(seq1[seq2 == value, 0], seq1[seq2 == value, 1],
                c = ListedColormap(('mediumturquoise', 'lightsalmon', 'lightcoral'))(key), label = value)
plt.title('Training set')
plt.xlabel('LDA1')
plt.ylabel('LDA2')
plt.legend()
plt.savefig('output/1_training.png')
plt.show()

from matplotlib.colors import ListedColormap
seq1, seq2 = label1_test, label2_test
grid1, grid2 = np.meshgrid(np.arange(start = seq1[:, 0].min() - 1, stop = seq1[:, 0].max() + 1, step = 0.01),
                     np.arange(start = seq1[:, 1].min() - 1, stop = seq1[:, 1].max() + 1, step = 0.01))
plt.contourf(grid1, grid2, model.predict(np.array([grid1.ravel(), grid2.ravel()]).T).reshape(grid1.shape),
             alpha = 0.75, cmap = ListedColormap(('lightblue', 'peachpuff', 'mistyrose')))
plt.xlim(grid1.min(), grid1.max())
plt.ylim(grid2.min(), grid2.max())
for key, value in enumerate(np.unique(seq2)):
    plt.scatter(seq1[seq2 == value, 0], seq1[seq2 == value, 1],
                c = ListedColormap(('mediumturquoise', 'lightsalmon', 'lightcoral'))(key), label = value)
plt.title('Test set')
plt.xlabel('LDA1')
plt.ylabel('LDA2')
plt.legend()
plt.savefig('output/2_testing.png')
plt.show()

main.py

Data.csv

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
items = pd.read_csv('Data.csv')
label1 = items.iloc[:, 0:13].values
label2 = items.iloc[:, 13].values
from sklearn.model_selection import train_test_split
label1_train, label1_test, label2_train, label2_test = train_test_split(label1, label2, test_size = 0.2, random_state = 0)
from sklearn.preprocessing import StandardScaler
scaling = StandardScaler()
label1_train = scaling.fit_transform(label1_train)
label1_test = scaling.transform(label1_test)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
analysis = LDA(n_components = 2)
label1_train = analysis.fit_transform(label1_train, label2_train)
label1_test = analysis.transform(label1_test)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state = 0)
model.fit(label1_train, label2_train)
prediction = model.predict(label1_test)
from sklearn.metrics import confusion_matrix, accuracy_score
matrix = confusion_matrix(label2_test, prediction)
print(matrix)
score = accuracy_score(label2_test, prediction)
print(score)
from matplotlib.colors import ListedColormap
seq1, seq2 = label1_train, label2_train
grid1, grid2 = np.meshgrid(np.arange(start = seq1[:, 0].min() - 1, stop = seq1[:, 0].max() + 1, step = 0.01),
                     np.arange(start = seq1[:, 1].min() - 1, stop = seq1[:, 1].max() + 1, step = 0.01))
plt.contourf(grid1, grid2, model.predict(np.array([grid1.ravel(), grid2.ravel()]).T).reshape(grid1.shape),
             alpha = 0.75, cmap = ListedColormap(('lightblue', 'peachpuff', 'mistyrose')))
plt.xlim(grid1.min(), grid1.max())
plt.ylim(grid2.min(), grid2.max())
for key, value in enumerate(np.unique(seq2)):
    plt.scatter(seq1[seq2 == value, 0], seq1[seq2 == value, 1],
                c = ListedColormap(('mediumturquoise', 'lightsalmon', 'lightcoral'))(key), label = value)
plt.title('Training set')
plt.xlabel('LDA1')
plt.ylabel('LDA2')
plt.legend()
plt.savefig('output/1_training.png')
plt.show()
plt.clf()
from matplotlib.colors import ListedColormap
seq1, seq2 = label1_test, label2_test
grid1, grid2 = np.meshgrid(np.arange(start = seq1[:, 0].min() - 1, stop = seq1[:, 0].max() + 1, step = 0.01),
                     np.arange(start = seq1[:, 1].min() - 1, stop = seq1[:, 1].max() + 1, step = 0.01))
plt.contourf(grid1, grid2, model.predict(np.array([grid1.ravel(), grid2.ravel()]).T).reshape(grid1.shape),
             alpha = 0.75, cmap = ListedColormap(('lightblue', 'peachpuff', 'mistyrose')))
plt.xlim(grid1.min(), grid1.max())
plt.ylim(grid2.min(), grid2.max())
for key, value in enumerate(np.unique(seq2)):
    plt.scatter(seq1[seq2 == value, 0], seq1[seq2 == value, 1],
                c = ListedColormap(('mediumturquoise', 'lightsalmon', 'lightcoral'))(key), label = value)
plt.title('Test set')
plt.xlabel('LDA1')
plt.ylabel('LDA2')
plt.legend()
plt.savefig('output/2_testing.png')
plt.show()

Linear discriminant analysis in Python

Step 1 - Importing the libraries

Step 2 - Importing the dataset

Step 3 - Splitting the dataset into training and test set

Step 4 - Feature scaling

Step 5 - Applying LDA

Step 6 - Fitting the model to the training dataset

Step 7 - Predicting the test set results

Step 8 - The visualization of training set results

Step 9 - The visualization of test set results

Code

Conclusion