In this project, finding unique values in every feature, finding maximum value minimum value of numerical_features, plotting histogram and plot histogram
Step:- 1
First, we upload the necessary libraries and then we upload the dataset
import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns
LOAN = pd.read_csv("LOAN_DATASET.csv")
LOAN
LOAN.info()
print(f'Total no of empty values: {LOAN.isna().sum().sum()}')
LOAN.isna().sum()
LOAN.describe()
Step:- 2
Finding unique values in every feature
def get_unq(LOAN):
for i in LOAN.columns:
print(f'{i} - {len(LOAN[i].unique())}')
get_unq(LOAN)
Step:- 3
Finding maximum value minimum value of numerical_features
def min_max(LOAN):
for i in LOAN.columns:
if LOAN[i].dtypes!='object':
print(f'{i} -> {sorted(list(LOAN[i]))[0]} to {sorted(list(LOAN[i]))[-1]}')
min_max(LOAN)
Step:- 4
# label encoder for categorical data from sklearn.preprocessing import LabelEncoder encoder = LabelEncoder() LOAN['purpose'] = pd.DataFrame(encoder.fit_transform(LOAN['purpose']))
Now we plot graph
def histplo(df):
for i in LOAN.columns:
plt.figure(figsize=(5,7))
if i!= 'not.fully.paid':
sns.histplot(data=df,x = i,bins=30,kde = True,hue='not.fully.paid')
histplo(LOAN)
plt.figure(figsize=(20,13)) sns.heatmap(LOAN.corr(),linewidths=0.5,annot= True)
Submitted by Subhojit Jalal (Subhojit1234)
Download packets of source code on Coders Packet
Comments