import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
print(tf.__version__)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from google.colab import drive
drive.mount('/content/drive')
file = open('/content/drive/My Drive/WISDM_ar_v1.1_raw.txt')
lines = file.readlines()
processedList = []
for i, line in enumerate(lines):
try:
line = line.split(',')
last = line[5].split(';')[0]
last = last.strip()
if last == '':
break;
temp = [line[0], line[1], line[2], line[3], line[4], last]
processedList.append(temp)
except:
print('Error at line number: ', i)
processedList[:10]
columns = ['user', 'activity', 'time', 'x', 'y', 'z']
data = pd.DataFrame(data = processedList, columns = columns)
data.head()
data.shape
data.info()
data.isnull().sum()
data['activity'].value_counts()
#Balancing the Data
data['x'] = data['x'].astype('float')
data['y'] = data['y'].astype('float')
data['z'] = data['z'].astype('float')
data.info()
Fs = 20
activities = data['activity'].value_counts().index
activities
#Plotting the Activities
def plot_activity(activity, data):
fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, figsize=(15, 7), sharex=True)
plot_axis(ax0, data['time'], data['x'], 'X-Axis')
plot_axis(ax1, data['time'], data['y'], 'Y-Axis')
plot_axis(ax2, data['time'], data['z'], 'Z-Axis')
plt.subplots_adjust(hspace=0.2)
fig.suptitle(activity)
plt.subplots_adjust(top=0.90)
plt.show()
def plot_axis(ax, x, y, title):
ax.plot(x, y, 'g')
ax.set_title(title)
ax.xaxis.set_visible(False)
ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
ax.set_xlim([min(x), max(x)])
ax.grid(True)
for activity in activities:
data_for_plot = data[(data['activity'] == activity)][:Fs*10]
plot_activity(activity, data_for_plot)
df = data.drop(['user', 'time'], axis = 1).copy()
df.head()
df['activity'].value_counts()
Walking = df[df['activity']=='Walking'].head(3555).copy()
Jogging = df[df['activity']=='Jogging'].head(3555).copy()
Upstairs = df[df['activity']=='Upstairs'].head(3555).copy()
Downstairs = df[df['activity']=='Downstairs'].head(3555).copy()
Sitting = df[df['activity']=='Sitting'].head(3555).copy()
Standing = df[df['activity']=='Standing'].copy()
balanced_data = pd.DataFrame()
balanced_data = balanced_data.append([Walking, Jogging, Upstairs, Downstairs, Sitting, Standing])
balanced_data.shape
balanced_data['activity'].value_counts()
balanced_data.head()
#Encoding
label = LabelEncoder()
balanced_data['label'] = label.fit_transform(balanced_data['activity'])
balanced_data.head()
label.classes_
#Standardization of Data
X = balanced_data[['x', 'y', 'z']]
y = balanced_data['label']
scaler = StandardScaler()
X = scaler.fit_transform(X)
scaled_X = pd.DataFrame(data = X, columns = ['x', 'y', 'z'])
scaled_X['label'] = y.values
scaled_X.head()
#Creating Frames
import scipy.stats as stats
Fs = 20
frame_size = Fs*4 # 80
hop_size = Fs*2 # 40
def get_frames(df, frame_size, hop_size):
N_FEATURES = 3
frames = []
labels = []
for i in range(0, len(df) - frame_size, hop_size):
x = df['x'].values[i: i + frame_size]
y = df['y'].values[i: i + frame_size]
z = df['z'].values[i: i + frame_size]
# Retrieve the most often used label in this segment
label = stats.mode(df['label'][i: i + frame_size])[0][0]
frames.append([x, y, z])
labels.append(label)
# Bring the segments into a better shape
frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
labels = np.asarray(labels)
return frames, labels
X, y = get_frames(scaled_X, frame_size, hop_size)
X.shape, y.shape
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify = y)
X_train.shape, X_test.shape
X_train[0].shape, X_test[0].shape
X_train = X_train.reshape(425, 80, 3, 1)
X_test = X_test.reshape(107, 80, 3, 1)
X_train[0].shape, X_test[0].shape
#2D CNN Model
model = Sequential()
model.add(Conv2D(16, (2, 2), activation = 'relu', input_shape = X_train[0].shape))
model.add(Dropout(0.1))
model.add(Conv2D(32, (2, 2), activation='relu'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(6, activation='softmax'))
model.compile(optimizer=Adam(learning_rate = 0.001), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
history = model.fit(X_train, y_train, epochs = 10, validation_data= (X_test, y_test), verbose=1)
def plot_learningCurve(history, epochs):
# Plot training & validation accuracy values
epoch_range = range(1, epochs+1)
plt.plot(epoch_range, history.history['accuracy'])
plt.plot(epoch_range, history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()
# Plot training & validation loss values
plt.plot(epoch_range, history.history['loss'])
plt.plot(epoch_range, history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()
plot_learningCurve(history, 10)
#Confusion Matrix
from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
y_pred = model.predict_classes(X_test)
mat = confusion_matrix(y_test, y_pred)
plot_confusion_matrix(conf_mat=mat, show_normed=True, figsize=(7,7))